1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 namespace { 461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 462 /// Values for bit flags for marking which requires clauses have been used. 463 enum OpenMPOffloadingRequiresDirFlags : int64_t { 464 /// flag undefined. 465 OMP_REQ_UNDEFINED = 0x000, 466 /// no requires clause present. 467 OMP_REQ_NONE = 0x001, 468 /// reverse_offload clause. 469 OMP_REQ_REVERSE_OFFLOAD = 0x002, 470 /// unified_address clause. 471 OMP_REQ_UNIFIED_ADDRESS = 0x004, 472 /// unified_shared_memory clause. 473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 474 /// dynamic_allocators clause. 475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 477 }; 478 } // anonymous namespace 479 480 /// Describes ident structure that describes a source location. 481 /// All descriptions are taken from 482 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 483 /// Original structure: 484 /// typedef struct ident { 485 /// kmp_int32 reserved_1; /**< might be used in Fortran; 486 /// see above */ 487 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 488 /// KMP_IDENT_KMPC identifies this union 489 /// member */ 490 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 491 /// see above */ 492 ///#if USE_ITT_BUILD 493 /// /* but currently used for storing 494 /// region-specific ITT */ 495 /// /* contextual information. */ 496 ///#endif /* USE_ITT_BUILD */ 497 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 498 /// C++ */ 499 /// char const *psource; /**< String describing the source location. 500 /// The string is composed of semi-colon separated 501 // fields which describe the source file, 502 /// the function and a pair of line numbers that 503 /// delimit the construct. 504 /// */ 505 /// } ident_t; 506 enum IdentFieldIndex { 507 /// might be used in Fortran 508 IdentField_Reserved_1, 509 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 510 IdentField_Flags, 511 /// Not really used in Fortran any more 512 IdentField_Reserved_2, 513 /// Source[4] in Fortran, do not use for C++ 514 IdentField_Reserved_3, 515 /// String describing the source location. The string is composed of 516 /// semi-colon separated fields which describe the source file, the function 517 /// and a pair of line numbers that delimit the construct. 518 IdentField_PSource 519 }; 520 521 /// Schedule types for 'omp for' loops (these enumerators are taken from 522 /// the enum sched_type in kmp.h). 523 enum OpenMPSchedType { 524 /// Lower bound for default (unordered) versions. 525 OMP_sch_lower = 32, 526 OMP_sch_static_chunked = 33, 527 OMP_sch_static = 34, 528 OMP_sch_dynamic_chunked = 35, 529 OMP_sch_guided_chunked = 36, 530 OMP_sch_runtime = 37, 531 OMP_sch_auto = 38, 532 /// static with chunk adjustment (e.g., simd) 533 OMP_sch_static_balanced_chunked = 45, 534 /// Lower bound for 'ordered' versions. 535 OMP_ord_lower = 64, 536 OMP_ord_static_chunked = 65, 537 OMP_ord_static = 66, 538 OMP_ord_dynamic_chunked = 67, 539 OMP_ord_guided_chunked = 68, 540 OMP_ord_runtime = 69, 541 OMP_ord_auto = 70, 542 OMP_sch_default = OMP_sch_static, 543 /// dist_schedule types 544 OMP_dist_sch_static_chunked = 91, 545 OMP_dist_sch_static = 92, 546 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 547 /// Set if the monotonic schedule modifier was present. 548 OMP_sch_modifier_monotonic = (1 << 29), 549 /// Set if the nonmonotonic schedule modifier was present. 550 OMP_sch_modifier_nonmonotonic = (1 << 30), 551 }; 552 553 enum OpenMPRTLFunction { 554 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 555 /// kmpc_micro microtask, ...); 556 OMPRTL__kmpc_fork_call, 557 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 558 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 559 OMPRTL__kmpc_threadprivate_cached, 560 /// Call to void __kmpc_threadprivate_register( ident_t *, 561 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 562 OMPRTL__kmpc_threadprivate_register, 563 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 564 OMPRTL__kmpc_global_thread_num, 565 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 566 // kmp_critical_name *crit); 567 OMPRTL__kmpc_critical, 568 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 569 // global_tid, kmp_critical_name *crit, uintptr_t hint); 570 OMPRTL__kmpc_critical_with_hint, 571 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 572 // kmp_critical_name *crit); 573 OMPRTL__kmpc_end_critical, 574 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 575 // global_tid); 576 OMPRTL__kmpc_cancel_barrier, 577 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 578 OMPRTL__kmpc_barrier, 579 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 580 OMPRTL__kmpc_for_static_fini, 581 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 582 // global_tid); 583 OMPRTL__kmpc_serialized_parallel, 584 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 585 // global_tid); 586 OMPRTL__kmpc_end_serialized_parallel, 587 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 588 // kmp_int32 num_threads); 589 OMPRTL__kmpc_push_num_threads, 590 // Call to void __kmpc_flush(ident_t *loc); 591 OMPRTL__kmpc_flush, 592 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 593 OMPRTL__kmpc_master, 594 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 595 OMPRTL__kmpc_end_master, 596 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 597 // int end_part); 598 OMPRTL__kmpc_omp_taskyield, 599 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 600 OMPRTL__kmpc_single, 601 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 602 OMPRTL__kmpc_end_single, 603 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 604 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 605 // kmp_routine_entry_t *task_entry); 606 OMPRTL__kmpc_omp_task_alloc, 607 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 608 // new_task); 609 OMPRTL__kmpc_omp_task, 610 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 611 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 612 // kmp_int32 didit); 613 OMPRTL__kmpc_copyprivate, 614 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 615 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 616 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 617 OMPRTL__kmpc_reduce, 618 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 619 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 620 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 621 // *lck); 622 OMPRTL__kmpc_reduce_nowait, 623 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 624 // kmp_critical_name *lck); 625 OMPRTL__kmpc_end_reduce, 626 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 627 // kmp_critical_name *lck); 628 OMPRTL__kmpc_end_reduce_nowait, 629 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 630 // kmp_task_t * new_task); 631 OMPRTL__kmpc_omp_task_begin_if0, 632 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 633 // kmp_task_t * new_task); 634 OMPRTL__kmpc_omp_task_complete_if0, 635 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 636 OMPRTL__kmpc_ordered, 637 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 638 OMPRTL__kmpc_end_ordered, 639 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 640 // global_tid); 641 OMPRTL__kmpc_omp_taskwait, 642 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 643 OMPRTL__kmpc_taskgroup, 644 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 645 OMPRTL__kmpc_end_taskgroup, 646 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 647 // int proc_bind); 648 OMPRTL__kmpc_push_proc_bind, 649 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 650 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 651 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 652 OMPRTL__kmpc_omp_task_with_deps, 653 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 654 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 655 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 656 OMPRTL__kmpc_omp_wait_deps, 657 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 658 // global_tid, kmp_int32 cncl_kind); 659 OMPRTL__kmpc_cancellationpoint, 660 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 661 // kmp_int32 cncl_kind); 662 OMPRTL__kmpc_cancel, 663 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 664 // kmp_int32 num_teams, kmp_int32 thread_limit); 665 OMPRTL__kmpc_push_num_teams, 666 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 667 // microtask, ...); 668 OMPRTL__kmpc_fork_teams, 669 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 670 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 671 // sched, kmp_uint64 grainsize, void *task_dup); 672 OMPRTL__kmpc_taskloop, 673 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 674 // num_dims, struct kmp_dim *dims); 675 OMPRTL__kmpc_doacross_init, 676 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 677 OMPRTL__kmpc_doacross_fini, 678 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 679 // *vec); 680 OMPRTL__kmpc_doacross_post, 681 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 682 // *vec); 683 OMPRTL__kmpc_doacross_wait, 684 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 685 // *data); 686 OMPRTL__kmpc_task_reduction_init, 687 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 688 // *d); 689 OMPRTL__kmpc_task_reduction_get_th_data, 690 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 691 OMPRTL__kmpc_alloc, 692 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 693 OMPRTL__kmpc_free, 694 695 // 696 // Offloading related calls 697 // 698 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 699 // size); 700 OMPRTL__kmpc_push_target_tripcount, 701 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 702 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 703 // *arg_types); 704 OMPRTL__tgt_target, 705 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 706 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 707 // *arg_types); 708 OMPRTL__tgt_target_nowait, 709 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 710 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 711 // *arg_types, int32_t num_teams, int32_t thread_limit); 712 OMPRTL__tgt_target_teams, 713 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 714 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 715 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 716 OMPRTL__tgt_target_teams_nowait, 717 // Call to void __tgt_register_requires(int64_t flags); 718 OMPRTL__tgt_register_requires, 719 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 720 OMPRTL__tgt_register_lib, 721 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 722 OMPRTL__tgt_unregister_lib, 723 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 724 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 725 OMPRTL__tgt_target_data_begin, 726 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 727 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 728 // *arg_types); 729 OMPRTL__tgt_target_data_begin_nowait, 730 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 731 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 732 OMPRTL__tgt_target_data_end, 733 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 734 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 735 // *arg_types); 736 OMPRTL__tgt_target_data_end_nowait, 737 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 738 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 739 OMPRTL__tgt_target_data_update, 740 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 741 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 742 // *arg_types); 743 OMPRTL__tgt_target_data_update_nowait, 744 }; 745 746 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 747 /// region. 748 class CleanupTy final : public EHScopeStack::Cleanup { 749 PrePostActionTy *Action; 750 751 public: 752 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 753 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 754 if (!CGF.HaveInsertPoint()) 755 return; 756 Action->Exit(CGF); 757 } 758 }; 759 760 } // anonymous namespace 761 762 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 763 CodeGenFunction::RunCleanupsScope Scope(CGF); 764 if (PrePostAction) { 765 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 766 Callback(CodeGen, CGF, *PrePostAction); 767 } else { 768 PrePostActionTy Action; 769 Callback(CodeGen, CGF, Action); 770 } 771 } 772 773 /// Check if the combiner is a call to UDR combiner and if it is so return the 774 /// UDR decl used for reduction. 775 static const OMPDeclareReductionDecl * 776 getReductionInit(const Expr *ReductionOp) { 777 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 778 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 779 if (const auto *DRE = 780 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 781 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 782 return DRD; 783 return nullptr; 784 } 785 786 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 787 const OMPDeclareReductionDecl *DRD, 788 const Expr *InitOp, 789 Address Private, Address Original, 790 QualType Ty) { 791 if (DRD->getInitializer()) { 792 std::pair<llvm::Function *, llvm::Function *> Reduction = 793 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 794 const auto *CE = cast<CallExpr>(InitOp); 795 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 796 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 797 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 798 const auto *LHSDRE = 799 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 800 const auto *RHSDRE = 801 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 802 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 803 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 804 [=]() { return Private; }); 805 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 806 [=]() { return Original; }); 807 (void)PrivateScope.Privatize(); 808 RValue Func = RValue::get(Reduction.second); 809 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 810 CGF.EmitIgnoredExpr(InitOp); 811 } else { 812 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 813 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 814 auto *GV = new llvm::GlobalVariable( 815 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 816 llvm::GlobalValue::PrivateLinkage, Init, Name); 817 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 818 RValue InitRVal; 819 switch (CGF.getEvaluationKind(Ty)) { 820 case TEK_Scalar: 821 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 822 break; 823 case TEK_Complex: 824 InitRVal = 825 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 826 break; 827 case TEK_Aggregate: 828 InitRVal = RValue::getAggregate(LV.getAddress()); 829 break; 830 } 831 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 832 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 833 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 834 /*IsInitializer=*/false); 835 } 836 } 837 838 /// Emit initialization of arrays of complex types. 839 /// \param DestAddr Address of the array. 840 /// \param Type Type of array. 841 /// \param Init Initial expression of array. 842 /// \param SrcAddr Address of the original array. 843 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 844 QualType Type, bool EmitDeclareReductionInit, 845 const Expr *Init, 846 const OMPDeclareReductionDecl *DRD, 847 Address SrcAddr = Address::invalid()) { 848 // Perform element-by-element initialization. 849 QualType ElementTy; 850 851 // Drill down to the base element type on both arrays. 852 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 853 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 854 DestAddr = 855 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 856 if (DRD) 857 SrcAddr = 858 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 859 860 llvm::Value *SrcBegin = nullptr; 861 if (DRD) 862 SrcBegin = SrcAddr.getPointer(); 863 llvm::Value *DestBegin = DestAddr.getPointer(); 864 // Cast from pointer to array type to pointer to single element. 865 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 866 // The basic structure here is a while-do loop. 867 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 868 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 869 llvm::Value *IsEmpty = 870 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 871 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 872 873 // Enter the loop body, making that address the current address. 874 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 875 CGF.EmitBlock(BodyBB); 876 877 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 878 879 llvm::PHINode *SrcElementPHI = nullptr; 880 Address SrcElementCurrent = Address::invalid(); 881 if (DRD) { 882 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 883 "omp.arraycpy.srcElementPast"); 884 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 885 SrcElementCurrent = 886 Address(SrcElementPHI, 887 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 888 } 889 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 890 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 891 DestElementPHI->addIncoming(DestBegin, EntryBB); 892 Address DestElementCurrent = 893 Address(DestElementPHI, 894 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 895 896 // Emit copy. 897 { 898 CodeGenFunction::RunCleanupsScope InitScope(CGF); 899 if (EmitDeclareReductionInit) { 900 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 901 SrcElementCurrent, ElementTy); 902 } else 903 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 904 /*IsInitializer=*/false); 905 } 906 907 if (DRD) { 908 // Shift the address forward by one element. 909 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 910 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 911 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 912 } 913 914 // Shift the address forward by one element. 915 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 916 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 917 // Check whether we've reached the end. 918 llvm::Value *Done = 919 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 920 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 921 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 922 923 // Done. 924 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 925 } 926 927 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 928 return CGF.EmitOMPSharedLValue(E); 929 } 930 931 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 932 const Expr *E) { 933 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 934 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 935 return LValue(); 936 } 937 938 void ReductionCodeGen::emitAggregateInitialization( 939 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 940 const OMPDeclareReductionDecl *DRD) { 941 // Emit VarDecl with copy init for arrays. 942 // Get the address of the original variable captured in current 943 // captured region. 944 const auto *PrivateVD = 945 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 946 bool EmitDeclareReductionInit = 947 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 948 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 949 EmitDeclareReductionInit, 950 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 951 : PrivateVD->getInit(), 952 DRD, SharedLVal.getAddress()); 953 } 954 955 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 956 ArrayRef<const Expr *> Privates, 957 ArrayRef<const Expr *> ReductionOps) { 958 ClausesData.reserve(Shareds.size()); 959 SharedAddresses.reserve(Shareds.size()); 960 Sizes.reserve(Shareds.size()); 961 BaseDecls.reserve(Shareds.size()); 962 auto IPriv = Privates.begin(); 963 auto IRed = ReductionOps.begin(); 964 for (const Expr *Ref : Shareds) { 965 ClausesData.emplace_back(Ref, *IPriv, *IRed); 966 std::advance(IPriv, 1); 967 std::advance(IRed, 1); 968 } 969 } 970 971 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 972 assert(SharedAddresses.size() == N && 973 "Number of generated lvalues must be exactly N."); 974 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 975 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 976 SharedAddresses.emplace_back(First, Second); 977 } 978 979 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 980 const auto *PrivateVD = 981 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 982 QualType PrivateType = PrivateVD->getType(); 983 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 984 if (!PrivateType->isVariablyModifiedType()) { 985 Sizes.emplace_back( 986 CGF.getTypeSize( 987 SharedAddresses[N].first.getType().getNonReferenceType()), 988 nullptr); 989 return; 990 } 991 llvm::Value *Size; 992 llvm::Value *SizeInChars; 993 auto *ElemType = 994 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 995 ->getElementType(); 996 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 997 if (AsArraySection) { 998 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 999 SharedAddresses[N].first.getPointer()); 1000 Size = CGF.Builder.CreateNUWAdd( 1001 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1002 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1003 } else { 1004 SizeInChars = CGF.getTypeSize( 1005 SharedAddresses[N].first.getType().getNonReferenceType()); 1006 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1007 } 1008 Sizes.emplace_back(SizeInChars, Size); 1009 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1010 CGF, 1011 cast<OpaqueValueExpr>( 1012 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1013 RValue::get(Size)); 1014 CGF.EmitVariablyModifiedType(PrivateType); 1015 } 1016 1017 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1018 llvm::Value *Size) { 1019 const auto *PrivateVD = 1020 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1021 QualType PrivateType = PrivateVD->getType(); 1022 if (!PrivateType->isVariablyModifiedType()) { 1023 assert(!Size && !Sizes[N].second && 1024 "Size should be nullptr for non-variably modified reduction " 1025 "items."); 1026 return; 1027 } 1028 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1029 CGF, 1030 cast<OpaqueValueExpr>( 1031 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1032 RValue::get(Size)); 1033 CGF.EmitVariablyModifiedType(PrivateType); 1034 } 1035 1036 void ReductionCodeGen::emitInitialization( 1037 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1038 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1039 assert(SharedAddresses.size() > N && "No variable was generated"); 1040 const auto *PrivateVD = 1041 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1042 const OMPDeclareReductionDecl *DRD = 1043 getReductionInit(ClausesData[N].ReductionOp); 1044 QualType PrivateType = PrivateVD->getType(); 1045 PrivateAddr = CGF.Builder.CreateElementBitCast( 1046 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1047 QualType SharedType = SharedAddresses[N].first.getType(); 1048 SharedLVal = CGF.MakeAddrLValue( 1049 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1050 CGF.ConvertTypeForMem(SharedType)), 1051 SharedType, SharedAddresses[N].first.getBaseInfo(), 1052 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1053 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1054 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1055 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1056 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1057 PrivateAddr, SharedLVal.getAddress(), 1058 SharedLVal.getType()); 1059 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1060 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1061 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1062 PrivateVD->getType().getQualifiers(), 1063 /*IsInitializer=*/false); 1064 } 1065 } 1066 1067 bool ReductionCodeGen::needCleanups(unsigned N) { 1068 const auto *PrivateVD = 1069 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1070 QualType PrivateType = PrivateVD->getType(); 1071 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1072 return DTorKind != QualType::DK_none; 1073 } 1074 1075 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1076 Address PrivateAddr) { 1077 const auto *PrivateVD = 1078 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1079 QualType PrivateType = PrivateVD->getType(); 1080 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1081 if (needCleanups(N)) { 1082 PrivateAddr = CGF.Builder.CreateElementBitCast( 1083 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1084 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1085 } 1086 } 1087 1088 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1089 LValue BaseLV) { 1090 BaseTy = BaseTy.getNonReferenceType(); 1091 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1092 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1093 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1094 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1095 } else { 1096 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1097 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1098 } 1099 BaseTy = BaseTy->getPointeeType(); 1100 } 1101 return CGF.MakeAddrLValue( 1102 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1103 CGF.ConvertTypeForMem(ElTy)), 1104 BaseLV.getType(), BaseLV.getBaseInfo(), 1105 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1106 } 1107 1108 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1109 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1110 llvm::Value *Addr) { 1111 Address Tmp = Address::invalid(); 1112 Address TopTmp = Address::invalid(); 1113 Address MostTopTmp = Address::invalid(); 1114 BaseTy = BaseTy.getNonReferenceType(); 1115 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1116 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1117 Tmp = CGF.CreateMemTemp(BaseTy); 1118 if (TopTmp.isValid()) 1119 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1120 else 1121 MostTopTmp = Tmp; 1122 TopTmp = Tmp; 1123 BaseTy = BaseTy->getPointeeType(); 1124 } 1125 llvm::Type *Ty = BaseLVType; 1126 if (Tmp.isValid()) 1127 Ty = Tmp.getElementType(); 1128 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1129 if (Tmp.isValid()) { 1130 CGF.Builder.CreateStore(Addr, Tmp); 1131 return MostTopTmp; 1132 } 1133 return Address(Addr, BaseLVAlignment); 1134 } 1135 1136 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1137 const VarDecl *OrigVD = nullptr; 1138 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1139 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1140 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1141 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1142 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1143 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1144 DE = cast<DeclRefExpr>(Base); 1145 OrigVD = cast<VarDecl>(DE->getDecl()); 1146 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1147 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1148 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1149 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1150 DE = cast<DeclRefExpr>(Base); 1151 OrigVD = cast<VarDecl>(DE->getDecl()); 1152 } 1153 return OrigVD; 1154 } 1155 1156 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1157 Address PrivateAddr) { 1158 const DeclRefExpr *DE; 1159 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1160 BaseDecls.emplace_back(OrigVD); 1161 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1162 LValue BaseLValue = 1163 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1164 OriginalBaseLValue); 1165 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1166 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1167 llvm::Value *PrivatePointer = 1168 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1169 PrivateAddr.getPointer(), 1170 SharedAddresses[N].first.getAddress().getType()); 1171 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1172 return castToBase(CGF, OrigVD->getType(), 1173 SharedAddresses[N].first.getType(), 1174 OriginalBaseLValue.getAddress().getType(), 1175 OriginalBaseLValue.getAlignment(), Ptr); 1176 } 1177 BaseDecls.emplace_back( 1178 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1179 return PrivateAddr; 1180 } 1181 1182 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1183 const OMPDeclareReductionDecl *DRD = 1184 getReductionInit(ClausesData[N].ReductionOp); 1185 return DRD && DRD->getInitializer(); 1186 } 1187 1188 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1189 return CGF.EmitLoadOfPointerLValue( 1190 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1191 getThreadIDVariable()->getType()->castAs<PointerType>()); 1192 } 1193 1194 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1195 if (!CGF.HaveInsertPoint()) 1196 return; 1197 // 1.2.2 OpenMP Language Terminology 1198 // Structured block - An executable statement with a single entry at the 1199 // top and a single exit at the bottom. 1200 // The point of exit cannot be a branch out of the structured block. 1201 // longjmp() and throw() must not violate the entry/exit criteria. 1202 CGF.EHStack.pushTerminate(); 1203 CodeGen(CGF); 1204 CGF.EHStack.popTerminate(); 1205 } 1206 1207 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1208 CodeGenFunction &CGF) { 1209 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1210 getThreadIDVariable()->getType(), 1211 AlignmentSource::Decl); 1212 } 1213 1214 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1215 QualType FieldTy) { 1216 auto *Field = FieldDecl::Create( 1217 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1218 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1219 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1220 Field->setAccess(AS_public); 1221 DC->addDecl(Field); 1222 return Field; 1223 } 1224 1225 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1226 StringRef Separator) 1227 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1228 OffloadEntriesInfoManager(CGM) { 1229 ASTContext &C = CGM.getContext(); 1230 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1231 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1232 RD->startDefinition(); 1233 // reserved_1 1234 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1235 // flags 1236 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1237 // reserved_2 1238 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1239 // reserved_3 1240 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1241 // psource 1242 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1243 RD->completeDefinition(); 1244 IdentQTy = C.getRecordType(RD); 1245 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1246 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1247 1248 loadOffloadInfoMetadata(); 1249 } 1250 1251 void CGOpenMPRuntime::clear() { 1252 InternalVars.clear(); 1253 // Clean non-target variable declarations possibly used only in debug info. 1254 for (const auto &Data : EmittedNonTargetVariables) { 1255 if (!Data.getValue().pointsToAliveValue()) 1256 continue; 1257 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1258 if (!GV) 1259 continue; 1260 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1261 continue; 1262 GV->eraseFromParent(); 1263 } 1264 } 1265 1266 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1267 SmallString<128> Buffer; 1268 llvm::raw_svector_ostream OS(Buffer); 1269 StringRef Sep = FirstSeparator; 1270 for (StringRef Part : Parts) { 1271 OS << Sep << Part; 1272 Sep = Separator; 1273 } 1274 return OS.str(); 1275 } 1276 1277 static llvm::Function * 1278 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1279 const Expr *CombinerInitializer, const VarDecl *In, 1280 const VarDecl *Out, bool IsCombiner) { 1281 // void .omp_combiner.(Ty *in, Ty *out); 1282 ASTContext &C = CGM.getContext(); 1283 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1284 FunctionArgList Args; 1285 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1286 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1287 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1288 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1289 Args.push_back(&OmpOutParm); 1290 Args.push_back(&OmpInParm); 1291 const CGFunctionInfo &FnInfo = 1292 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1293 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1294 std::string Name = CGM.getOpenMPRuntime().getName( 1295 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1296 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1297 Name, &CGM.getModule()); 1298 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1299 if (CGM.getLangOpts().Optimize) { 1300 Fn->removeFnAttr(llvm::Attribute::NoInline); 1301 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1302 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1303 } 1304 CodeGenFunction CGF(CGM); 1305 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1306 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1307 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1308 Out->getLocation()); 1309 CodeGenFunction::OMPPrivateScope Scope(CGF); 1310 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1311 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1312 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1313 .getAddress(); 1314 }); 1315 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1316 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1317 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1318 .getAddress(); 1319 }); 1320 (void)Scope.Privatize(); 1321 if (!IsCombiner && Out->hasInit() && 1322 !CGF.isTrivialInitializer(Out->getInit())) { 1323 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1324 Out->getType().getQualifiers(), 1325 /*IsInitializer=*/true); 1326 } 1327 if (CombinerInitializer) 1328 CGF.EmitIgnoredExpr(CombinerInitializer); 1329 Scope.ForceCleanup(); 1330 CGF.FinishFunction(); 1331 return Fn; 1332 } 1333 1334 void CGOpenMPRuntime::emitUserDefinedReduction( 1335 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1336 if (UDRMap.count(D) > 0) 1337 return; 1338 llvm::Function *Combiner = emitCombinerOrInitializer( 1339 CGM, D->getType(), D->getCombiner(), 1340 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1341 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1342 /*IsCombiner=*/true); 1343 llvm::Function *Initializer = nullptr; 1344 if (const Expr *Init = D->getInitializer()) { 1345 Initializer = emitCombinerOrInitializer( 1346 CGM, D->getType(), 1347 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1348 : nullptr, 1349 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1350 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1351 /*IsCombiner=*/false); 1352 } 1353 UDRMap.try_emplace(D, Combiner, Initializer); 1354 if (CGF) { 1355 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1356 Decls.second.push_back(D); 1357 } 1358 } 1359 1360 std::pair<llvm::Function *, llvm::Function *> 1361 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1362 auto I = UDRMap.find(D); 1363 if (I != UDRMap.end()) 1364 return I->second; 1365 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1366 return UDRMap.lookup(D); 1367 } 1368 1369 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1370 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1371 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1372 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1373 assert(ThreadIDVar->getType()->isPointerType() && 1374 "thread id variable must be of type kmp_int32 *"); 1375 CodeGenFunction CGF(CGM, true); 1376 bool HasCancel = false; 1377 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1378 HasCancel = OPD->hasCancel(); 1379 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1380 HasCancel = OPSD->hasCancel(); 1381 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1382 HasCancel = OPFD->hasCancel(); 1383 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1384 HasCancel = OPFD->hasCancel(); 1385 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1386 HasCancel = OPFD->hasCancel(); 1387 else if (const auto *OPFD = 1388 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1389 HasCancel = OPFD->hasCancel(); 1390 else if (const auto *OPFD = 1391 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1392 HasCancel = OPFD->hasCancel(); 1393 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1394 HasCancel, OutlinedHelperName); 1395 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1396 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1397 } 1398 1399 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1400 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1401 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1402 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1403 return emitParallelOrTeamsOutlinedFunction( 1404 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1405 } 1406 1407 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1408 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1409 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1410 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1411 return emitParallelOrTeamsOutlinedFunction( 1412 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1413 } 1414 1415 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1416 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1417 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1418 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1419 bool Tied, unsigned &NumberOfParts) { 1420 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1421 PrePostActionTy &) { 1422 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1423 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1424 llvm::Value *TaskArgs[] = { 1425 UpLoc, ThreadID, 1426 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1427 TaskTVar->getType()->castAs<PointerType>()) 1428 .getPointer()}; 1429 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1430 }; 1431 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1432 UntiedCodeGen); 1433 CodeGen.setAction(Action); 1434 assert(!ThreadIDVar->getType()->isPointerType() && 1435 "thread id variable must be of type kmp_int32 for tasks"); 1436 const OpenMPDirectiveKind Region = 1437 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1438 : OMPD_task; 1439 const CapturedStmt *CS = D.getCapturedStmt(Region); 1440 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1441 CodeGenFunction CGF(CGM, true); 1442 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1443 InnermostKind, 1444 TD ? TD->hasCancel() : false, Action); 1445 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1446 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1447 if (!Tied) 1448 NumberOfParts = Action.getNumberOfParts(); 1449 return Res; 1450 } 1451 1452 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1453 const RecordDecl *RD, const CGRecordLayout &RL, 1454 ArrayRef<llvm::Constant *> Data) { 1455 llvm::StructType *StructTy = RL.getLLVMType(); 1456 unsigned PrevIdx = 0; 1457 ConstantInitBuilder CIBuilder(CGM); 1458 auto DI = Data.begin(); 1459 for (const FieldDecl *FD : RD->fields()) { 1460 unsigned Idx = RL.getLLVMFieldNo(FD); 1461 // Fill the alignment. 1462 for (unsigned I = PrevIdx; I < Idx; ++I) 1463 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1464 PrevIdx = Idx + 1; 1465 Fields.add(*DI); 1466 ++DI; 1467 } 1468 } 1469 1470 template <class... As> 1471 static llvm::GlobalVariable * 1472 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1473 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1474 As &&... Args) { 1475 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1476 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1477 ConstantInitBuilder CIBuilder(CGM); 1478 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1479 buildStructValue(Fields, CGM, RD, RL, Data); 1480 return Fields.finishAndCreateGlobal( 1481 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1482 std::forward<As>(Args)...); 1483 } 1484 1485 template <typename T> 1486 static void 1487 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1488 ArrayRef<llvm::Constant *> Data, 1489 T &Parent) { 1490 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1491 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1492 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1493 buildStructValue(Fields, CGM, RD, RL, Data); 1494 Fields.finishAndAddTo(Parent); 1495 } 1496 1497 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1498 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1499 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1500 FlagsTy FlagsKey(Flags, Reserved2Flags); 1501 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1502 if (!Entry) { 1503 if (!DefaultOpenMPPSource) { 1504 // Initialize default location for psource field of ident_t structure of 1505 // all ident_t objects. Format is ";file;function;line;column;;". 1506 // Taken from 1507 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1508 DefaultOpenMPPSource = 1509 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1510 DefaultOpenMPPSource = 1511 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1512 } 1513 1514 llvm::Constant *Data[] = { 1515 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1516 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1517 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1518 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1519 llvm::GlobalValue *DefaultOpenMPLocation = 1520 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1521 llvm::GlobalValue::PrivateLinkage); 1522 DefaultOpenMPLocation->setUnnamedAddr( 1523 llvm::GlobalValue::UnnamedAddr::Global); 1524 1525 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1526 } 1527 return Address(Entry, Align); 1528 } 1529 1530 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1531 bool AtCurrentPoint) { 1532 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1533 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1534 1535 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1536 if (AtCurrentPoint) { 1537 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1538 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1539 } else { 1540 Elem.second.ServiceInsertPt = 1541 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1542 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1543 } 1544 } 1545 1546 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1547 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1548 if (Elem.second.ServiceInsertPt) { 1549 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1550 Elem.second.ServiceInsertPt = nullptr; 1551 Ptr->eraseFromParent(); 1552 } 1553 } 1554 1555 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1556 SourceLocation Loc, 1557 unsigned Flags) { 1558 Flags |= OMP_IDENT_KMPC; 1559 // If no debug info is generated - return global default location. 1560 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1561 Loc.isInvalid()) 1562 return getOrCreateDefaultLocation(Flags).getPointer(); 1563 1564 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1565 1566 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1567 Address LocValue = Address::invalid(); 1568 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1569 if (I != OpenMPLocThreadIDMap.end()) 1570 LocValue = Address(I->second.DebugLoc, Align); 1571 1572 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1573 // GetOpenMPThreadID was called before this routine. 1574 if (!LocValue.isValid()) { 1575 // Generate "ident_t .kmpc_loc.addr;" 1576 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1577 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1578 Elem.second.DebugLoc = AI.getPointer(); 1579 LocValue = AI; 1580 1581 if (!Elem.second.ServiceInsertPt) 1582 setLocThreadIdInsertPt(CGF); 1583 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1584 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1585 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1586 CGF.getTypeSize(IdentQTy)); 1587 } 1588 1589 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1590 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1591 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1592 LValue PSource = 1593 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1594 1595 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1596 if (OMPDebugLoc == nullptr) { 1597 SmallString<128> Buffer2; 1598 llvm::raw_svector_ostream OS2(Buffer2); 1599 // Build debug location 1600 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1601 OS2 << ";" << PLoc.getFilename() << ";"; 1602 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1603 OS2 << FD->getQualifiedNameAsString(); 1604 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1605 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1606 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1607 } 1608 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1609 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1610 1611 // Our callers always pass this to a runtime function, so for 1612 // convenience, go ahead and return a naked pointer. 1613 return LocValue.getPointer(); 1614 } 1615 1616 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1617 SourceLocation Loc) { 1618 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1619 1620 llvm::Value *ThreadID = nullptr; 1621 // Check whether we've already cached a load of the thread id in this 1622 // function. 1623 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1624 if (I != OpenMPLocThreadIDMap.end()) { 1625 ThreadID = I->second.ThreadID; 1626 if (ThreadID != nullptr) 1627 return ThreadID; 1628 } 1629 // If exceptions are enabled, do not use parameter to avoid possible crash. 1630 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1631 !CGF.getLangOpts().CXXExceptions || 1632 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1633 if (auto *OMPRegionInfo = 1634 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1635 if (OMPRegionInfo->getThreadIDVariable()) { 1636 // Check if this an outlined function with thread id passed as argument. 1637 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1638 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1639 // If value loaded in entry block, cache it and use it everywhere in 1640 // function. 1641 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1642 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1643 Elem.second.ThreadID = ThreadID; 1644 } 1645 return ThreadID; 1646 } 1647 } 1648 } 1649 1650 // This is not an outlined function region - need to call __kmpc_int32 1651 // kmpc_global_thread_num(ident_t *loc). 1652 // Generate thread id value and cache this value for use across the 1653 // function. 1654 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1655 if (!Elem.second.ServiceInsertPt) 1656 setLocThreadIdInsertPt(CGF); 1657 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1658 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1659 llvm::CallInst *Call = CGF.Builder.CreateCall( 1660 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1661 emitUpdateLocation(CGF, Loc)); 1662 Call->setCallingConv(CGF.getRuntimeCC()); 1663 Elem.second.ThreadID = Call; 1664 return Call; 1665 } 1666 1667 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1668 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1669 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1670 clearLocThreadIdInsertPt(CGF); 1671 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1672 } 1673 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1674 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1675 UDRMap.erase(D); 1676 FunctionUDRMap.erase(CGF.CurFn); 1677 } 1678 } 1679 1680 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1681 return IdentTy->getPointerTo(); 1682 } 1683 1684 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1685 if (!Kmpc_MicroTy) { 1686 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1687 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1688 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1689 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1690 } 1691 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1692 } 1693 1694 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1695 llvm::FunctionCallee RTLFn = nullptr; 1696 switch (static_cast<OpenMPRTLFunction>(Function)) { 1697 case OMPRTL__kmpc_fork_call: { 1698 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1699 // microtask, ...); 1700 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1701 getKmpc_MicroPointerTy()}; 1702 auto *FnTy = 1703 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1704 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1705 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1706 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1707 llvm::LLVMContext &Ctx = F->getContext(); 1708 llvm::MDBuilder MDB(Ctx); 1709 // Annotate the callback behavior of the __kmpc_fork_call: 1710 // - The callback callee is argument number 2 (microtask). 1711 // - The first two arguments of the callback callee are unknown (-1). 1712 // - All variadic arguments to the __kmpc_fork_call are passed to the 1713 // callback callee. 1714 F->addMetadata( 1715 llvm::LLVMContext::MD_callback, 1716 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1717 2, {-1, -1}, 1718 /* VarArgsArePassed */ true)})); 1719 } 1720 } 1721 break; 1722 } 1723 case OMPRTL__kmpc_global_thread_num: { 1724 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1725 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1726 auto *FnTy = 1727 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1728 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1729 break; 1730 } 1731 case OMPRTL__kmpc_threadprivate_cached: { 1732 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1733 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1734 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1735 CGM.VoidPtrTy, CGM.SizeTy, 1736 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1737 auto *FnTy = 1738 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1739 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1740 break; 1741 } 1742 case OMPRTL__kmpc_critical: { 1743 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1744 // kmp_critical_name *crit); 1745 llvm::Type *TypeParams[] = { 1746 getIdentTyPointerTy(), CGM.Int32Ty, 1747 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1748 auto *FnTy = 1749 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1750 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1751 break; 1752 } 1753 case OMPRTL__kmpc_critical_with_hint: { 1754 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1755 // kmp_critical_name *crit, uintptr_t hint); 1756 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1757 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1758 CGM.IntPtrTy}; 1759 auto *FnTy = 1760 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1761 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1762 break; 1763 } 1764 case OMPRTL__kmpc_threadprivate_register: { 1765 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1766 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1767 // typedef void *(*kmpc_ctor)(void *); 1768 auto *KmpcCtorTy = 1769 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1770 /*isVarArg*/ false)->getPointerTo(); 1771 // typedef void *(*kmpc_cctor)(void *, void *); 1772 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1773 auto *KmpcCopyCtorTy = 1774 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1775 /*isVarArg*/ false) 1776 ->getPointerTo(); 1777 // typedef void (*kmpc_dtor)(void *); 1778 auto *KmpcDtorTy = 1779 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1780 ->getPointerTo(); 1781 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1782 KmpcCopyCtorTy, KmpcDtorTy}; 1783 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1784 /*isVarArg*/ false); 1785 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1786 break; 1787 } 1788 case OMPRTL__kmpc_end_critical: { 1789 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1790 // kmp_critical_name *crit); 1791 llvm::Type *TypeParams[] = { 1792 getIdentTyPointerTy(), CGM.Int32Ty, 1793 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1794 auto *FnTy = 1795 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1796 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1797 break; 1798 } 1799 case OMPRTL__kmpc_cancel_barrier: { 1800 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1801 // global_tid); 1802 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1803 auto *FnTy = 1804 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1805 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1806 break; 1807 } 1808 case OMPRTL__kmpc_barrier: { 1809 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1810 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1811 auto *FnTy = 1812 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1813 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1814 break; 1815 } 1816 case OMPRTL__kmpc_for_static_fini: { 1817 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1818 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1819 auto *FnTy = 1820 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1821 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1822 break; 1823 } 1824 case OMPRTL__kmpc_push_num_threads: { 1825 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1826 // kmp_int32 num_threads) 1827 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1828 CGM.Int32Ty}; 1829 auto *FnTy = 1830 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1831 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1832 break; 1833 } 1834 case OMPRTL__kmpc_serialized_parallel: { 1835 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1836 // global_tid); 1837 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1838 auto *FnTy = 1839 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1840 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1841 break; 1842 } 1843 case OMPRTL__kmpc_end_serialized_parallel: { 1844 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1845 // global_tid); 1846 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1847 auto *FnTy = 1848 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1849 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1850 break; 1851 } 1852 case OMPRTL__kmpc_flush: { 1853 // Build void __kmpc_flush(ident_t *loc); 1854 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1855 auto *FnTy = 1856 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1857 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1858 break; 1859 } 1860 case OMPRTL__kmpc_master: { 1861 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1862 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1863 auto *FnTy = 1864 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1865 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1866 break; 1867 } 1868 case OMPRTL__kmpc_end_master: { 1869 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1870 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1871 auto *FnTy = 1872 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1873 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1874 break; 1875 } 1876 case OMPRTL__kmpc_omp_taskyield: { 1877 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1878 // int end_part); 1879 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1880 auto *FnTy = 1881 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1882 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1883 break; 1884 } 1885 case OMPRTL__kmpc_single: { 1886 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1887 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1888 auto *FnTy = 1889 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1890 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1891 break; 1892 } 1893 case OMPRTL__kmpc_end_single: { 1894 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1895 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1896 auto *FnTy = 1897 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1898 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1899 break; 1900 } 1901 case OMPRTL__kmpc_omp_task_alloc: { 1902 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1903 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1904 // kmp_routine_entry_t *task_entry); 1905 assert(KmpRoutineEntryPtrTy != nullptr && 1906 "Type kmp_routine_entry_t must be created."); 1907 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1908 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1909 // Return void * and then cast to particular kmp_task_t type. 1910 auto *FnTy = 1911 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1912 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_omp_task: { 1916 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1917 // *new_task); 1918 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1919 CGM.VoidPtrTy}; 1920 auto *FnTy = 1921 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1922 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1923 break; 1924 } 1925 case OMPRTL__kmpc_copyprivate: { 1926 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1927 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1928 // kmp_int32 didit); 1929 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1930 auto *CpyFnTy = 1931 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1932 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1933 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1934 CGM.Int32Ty}; 1935 auto *FnTy = 1936 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1937 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1938 break; 1939 } 1940 case OMPRTL__kmpc_reduce: { 1941 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1942 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1943 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1944 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1945 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1946 /*isVarArg=*/false); 1947 llvm::Type *TypeParams[] = { 1948 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1949 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1950 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1951 auto *FnTy = 1952 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1953 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1954 break; 1955 } 1956 case OMPRTL__kmpc_reduce_nowait: { 1957 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1958 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1959 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1960 // *lck); 1961 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1962 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1963 /*isVarArg=*/false); 1964 llvm::Type *TypeParams[] = { 1965 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1966 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1967 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1968 auto *FnTy = 1969 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1970 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1971 break; 1972 } 1973 case OMPRTL__kmpc_end_reduce: { 1974 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1975 // kmp_critical_name *lck); 1976 llvm::Type *TypeParams[] = { 1977 getIdentTyPointerTy(), CGM.Int32Ty, 1978 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1979 auto *FnTy = 1980 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1981 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1982 break; 1983 } 1984 case OMPRTL__kmpc_end_reduce_nowait: { 1985 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1986 // kmp_critical_name *lck); 1987 llvm::Type *TypeParams[] = { 1988 getIdentTyPointerTy(), CGM.Int32Ty, 1989 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1990 auto *FnTy = 1991 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1992 RTLFn = 1993 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1994 break; 1995 } 1996 case OMPRTL__kmpc_omp_task_begin_if0: { 1997 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1998 // *new_task); 1999 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2000 CGM.VoidPtrTy}; 2001 auto *FnTy = 2002 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2003 RTLFn = 2004 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2005 break; 2006 } 2007 case OMPRTL__kmpc_omp_task_complete_if0: { 2008 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2009 // *new_task); 2010 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2011 CGM.VoidPtrTy}; 2012 auto *FnTy = 2013 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2014 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2015 /*Name=*/"__kmpc_omp_task_complete_if0"); 2016 break; 2017 } 2018 case OMPRTL__kmpc_ordered: { 2019 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2020 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2021 auto *FnTy = 2022 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2023 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2024 break; 2025 } 2026 case OMPRTL__kmpc_end_ordered: { 2027 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2028 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2029 auto *FnTy = 2030 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2031 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2032 break; 2033 } 2034 case OMPRTL__kmpc_omp_taskwait: { 2035 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2036 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2037 auto *FnTy = 2038 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2039 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2040 break; 2041 } 2042 case OMPRTL__kmpc_taskgroup: { 2043 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2044 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2045 auto *FnTy = 2046 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2047 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2048 break; 2049 } 2050 case OMPRTL__kmpc_end_taskgroup: { 2051 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2052 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2053 auto *FnTy = 2054 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2055 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2056 break; 2057 } 2058 case OMPRTL__kmpc_push_proc_bind: { 2059 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2060 // int proc_bind) 2061 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2062 auto *FnTy = 2063 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2064 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2065 break; 2066 } 2067 case OMPRTL__kmpc_omp_task_with_deps: { 2068 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2069 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2070 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2071 llvm::Type *TypeParams[] = { 2072 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2073 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2074 auto *FnTy = 2075 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2076 RTLFn = 2077 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2078 break; 2079 } 2080 case OMPRTL__kmpc_omp_wait_deps: { 2081 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2082 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2083 // kmp_depend_info_t *noalias_dep_list); 2084 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2085 CGM.Int32Ty, CGM.VoidPtrTy, 2086 CGM.Int32Ty, CGM.VoidPtrTy}; 2087 auto *FnTy = 2088 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2089 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2090 break; 2091 } 2092 case OMPRTL__kmpc_cancellationpoint: { 2093 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2094 // global_tid, kmp_int32 cncl_kind) 2095 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2096 auto *FnTy = 2097 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2098 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2099 break; 2100 } 2101 case OMPRTL__kmpc_cancel: { 2102 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2103 // kmp_int32 cncl_kind) 2104 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2105 auto *FnTy = 2106 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2107 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2108 break; 2109 } 2110 case OMPRTL__kmpc_push_num_teams: { 2111 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2112 // kmp_int32 num_teams, kmp_int32 num_threads) 2113 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2114 CGM.Int32Ty}; 2115 auto *FnTy = 2116 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2117 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2118 break; 2119 } 2120 case OMPRTL__kmpc_fork_teams: { 2121 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2122 // microtask, ...); 2123 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2124 getKmpc_MicroPointerTy()}; 2125 auto *FnTy = 2126 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2127 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2128 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2129 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2130 llvm::LLVMContext &Ctx = F->getContext(); 2131 llvm::MDBuilder MDB(Ctx); 2132 // Annotate the callback behavior of the __kmpc_fork_teams: 2133 // - The callback callee is argument number 2 (microtask). 2134 // - The first two arguments of the callback callee are unknown (-1). 2135 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2136 // callback callee. 2137 F->addMetadata( 2138 llvm::LLVMContext::MD_callback, 2139 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2140 2, {-1, -1}, 2141 /* VarArgsArePassed */ true)})); 2142 } 2143 } 2144 break; 2145 } 2146 case OMPRTL__kmpc_taskloop: { 2147 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2148 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2149 // sched, kmp_uint64 grainsize, void *task_dup); 2150 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2151 CGM.IntTy, 2152 CGM.VoidPtrTy, 2153 CGM.IntTy, 2154 CGM.Int64Ty->getPointerTo(), 2155 CGM.Int64Ty->getPointerTo(), 2156 CGM.Int64Ty, 2157 CGM.IntTy, 2158 CGM.IntTy, 2159 CGM.Int64Ty, 2160 CGM.VoidPtrTy}; 2161 auto *FnTy = 2162 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2163 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2164 break; 2165 } 2166 case OMPRTL__kmpc_doacross_init: { 2167 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2168 // num_dims, struct kmp_dim *dims); 2169 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2170 CGM.Int32Ty, 2171 CGM.Int32Ty, 2172 CGM.VoidPtrTy}; 2173 auto *FnTy = 2174 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2175 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2176 break; 2177 } 2178 case OMPRTL__kmpc_doacross_fini: { 2179 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2180 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2181 auto *FnTy = 2182 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2183 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2184 break; 2185 } 2186 case OMPRTL__kmpc_doacross_post: { 2187 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2188 // *vec); 2189 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2190 CGM.Int64Ty->getPointerTo()}; 2191 auto *FnTy = 2192 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2193 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2194 break; 2195 } 2196 case OMPRTL__kmpc_doacross_wait: { 2197 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2198 // *vec); 2199 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2200 CGM.Int64Ty->getPointerTo()}; 2201 auto *FnTy = 2202 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2203 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2204 break; 2205 } 2206 case OMPRTL__kmpc_task_reduction_init: { 2207 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2208 // *data); 2209 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2210 auto *FnTy = 2211 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2212 RTLFn = 2213 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2214 break; 2215 } 2216 case OMPRTL__kmpc_task_reduction_get_th_data: { 2217 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2218 // *d); 2219 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2220 auto *FnTy = 2221 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2222 RTLFn = CGM.CreateRuntimeFunction( 2223 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2224 break; 2225 } 2226 case OMPRTL__kmpc_alloc: { 2227 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2228 // al); omp_allocator_handle_t type is void *. 2229 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2230 auto *FnTy = 2231 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2232 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2233 break; 2234 } 2235 case OMPRTL__kmpc_free: { 2236 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2237 // al); omp_allocator_handle_t type is void *. 2238 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2239 auto *FnTy = 2240 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2241 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2242 break; 2243 } 2244 case OMPRTL__kmpc_push_target_tripcount: { 2245 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2246 // size); 2247 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2248 llvm::FunctionType *FnTy = 2249 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2250 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2251 break; 2252 } 2253 case OMPRTL__tgt_target: { 2254 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2255 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2256 // *arg_types); 2257 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2258 CGM.VoidPtrTy, 2259 CGM.Int32Ty, 2260 CGM.VoidPtrPtrTy, 2261 CGM.VoidPtrPtrTy, 2262 CGM.SizeTy->getPointerTo(), 2263 CGM.Int64Ty->getPointerTo()}; 2264 auto *FnTy = 2265 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2266 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2267 break; 2268 } 2269 case OMPRTL__tgt_target_nowait: { 2270 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2271 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2272 // int64_t *arg_types); 2273 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2274 CGM.VoidPtrTy, 2275 CGM.Int32Ty, 2276 CGM.VoidPtrPtrTy, 2277 CGM.VoidPtrPtrTy, 2278 CGM.SizeTy->getPointerTo(), 2279 CGM.Int64Ty->getPointerTo()}; 2280 auto *FnTy = 2281 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2282 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2283 break; 2284 } 2285 case OMPRTL__tgt_target_teams: { 2286 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2287 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2288 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2289 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2290 CGM.VoidPtrTy, 2291 CGM.Int32Ty, 2292 CGM.VoidPtrPtrTy, 2293 CGM.VoidPtrPtrTy, 2294 CGM.SizeTy->getPointerTo(), 2295 CGM.Int64Ty->getPointerTo(), 2296 CGM.Int32Ty, 2297 CGM.Int32Ty}; 2298 auto *FnTy = 2299 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2300 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2301 break; 2302 } 2303 case OMPRTL__tgt_target_teams_nowait: { 2304 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2305 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2306 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2307 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2308 CGM.VoidPtrTy, 2309 CGM.Int32Ty, 2310 CGM.VoidPtrPtrTy, 2311 CGM.VoidPtrPtrTy, 2312 CGM.SizeTy->getPointerTo(), 2313 CGM.Int64Ty->getPointerTo(), 2314 CGM.Int32Ty, 2315 CGM.Int32Ty}; 2316 auto *FnTy = 2317 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2318 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2319 break; 2320 } 2321 case OMPRTL__tgt_register_requires: { 2322 // Build void __tgt_register_requires(int64_t flags); 2323 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2324 auto *FnTy = 2325 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2326 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2327 break; 2328 } 2329 case OMPRTL__tgt_register_lib: { 2330 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2331 QualType ParamTy = 2332 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2333 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2334 auto *FnTy = 2335 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2336 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2337 break; 2338 } 2339 case OMPRTL__tgt_unregister_lib: { 2340 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2341 QualType ParamTy = 2342 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2343 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2344 auto *FnTy = 2345 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2346 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2347 break; 2348 } 2349 case OMPRTL__tgt_target_data_begin: { 2350 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2351 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2352 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2353 CGM.Int32Ty, 2354 CGM.VoidPtrPtrTy, 2355 CGM.VoidPtrPtrTy, 2356 CGM.SizeTy->getPointerTo(), 2357 CGM.Int64Ty->getPointerTo()}; 2358 auto *FnTy = 2359 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2360 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2361 break; 2362 } 2363 case OMPRTL__tgt_target_data_begin_nowait: { 2364 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2365 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2366 // *arg_types); 2367 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2368 CGM.Int32Ty, 2369 CGM.VoidPtrPtrTy, 2370 CGM.VoidPtrPtrTy, 2371 CGM.SizeTy->getPointerTo(), 2372 CGM.Int64Ty->getPointerTo()}; 2373 auto *FnTy = 2374 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2375 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2376 break; 2377 } 2378 case OMPRTL__tgt_target_data_end: { 2379 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2380 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2381 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2382 CGM.Int32Ty, 2383 CGM.VoidPtrPtrTy, 2384 CGM.VoidPtrPtrTy, 2385 CGM.SizeTy->getPointerTo(), 2386 CGM.Int64Ty->getPointerTo()}; 2387 auto *FnTy = 2388 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2389 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2390 break; 2391 } 2392 case OMPRTL__tgt_target_data_end_nowait: { 2393 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2394 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2395 // *arg_types); 2396 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2397 CGM.Int32Ty, 2398 CGM.VoidPtrPtrTy, 2399 CGM.VoidPtrPtrTy, 2400 CGM.SizeTy->getPointerTo(), 2401 CGM.Int64Ty->getPointerTo()}; 2402 auto *FnTy = 2403 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2404 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2405 break; 2406 } 2407 case OMPRTL__tgt_target_data_update: { 2408 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2409 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2410 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2411 CGM.Int32Ty, 2412 CGM.VoidPtrPtrTy, 2413 CGM.VoidPtrPtrTy, 2414 CGM.SizeTy->getPointerTo(), 2415 CGM.Int64Ty->getPointerTo()}; 2416 auto *FnTy = 2417 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2418 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2419 break; 2420 } 2421 case OMPRTL__tgt_target_data_update_nowait: { 2422 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2423 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2424 // *arg_types); 2425 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2426 CGM.Int32Ty, 2427 CGM.VoidPtrPtrTy, 2428 CGM.VoidPtrPtrTy, 2429 CGM.SizeTy->getPointerTo(), 2430 CGM.Int64Ty->getPointerTo()}; 2431 auto *FnTy = 2432 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2433 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2434 break; 2435 } 2436 } 2437 assert(RTLFn && "Unable to find OpenMP runtime function"); 2438 return RTLFn; 2439 } 2440 2441 llvm::FunctionCallee 2442 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2443 assert((IVSize == 32 || IVSize == 64) && 2444 "IV size is not compatible with the omp runtime"); 2445 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2446 : "__kmpc_for_static_init_4u") 2447 : (IVSigned ? "__kmpc_for_static_init_8" 2448 : "__kmpc_for_static_init_8u"); 2449 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2450 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2451 llvm::Type *TypeParams[] = { 2452 getIdentTyPointerTy(), // loc 2453 CGM.Int32Ty, // tid 2454 CGM.Int32Ty, // schedtype 2455 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2456 PtrTy, // p_lower 2457 PtrTy, // p_upper 2458 PtrTy, // p_stride 2459 ITy, // incr 2460 ITy // chunk 2461 }; 2462 auto *FnTy = 2463 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2464 return CGM.CreateRuntimeFunction(FnTy, Name); 2465 } 2466 2467 llvm::FunctionCallee 2468 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2469 assert((IVSize == 32 || IVSize == 64) && 2470 "IV size is not compatible with the omp runtime"); 2471 StringRef Name = 2472 IVSize == 32 2473 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2474 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2475 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2476 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2477 CGM.Int32Ty, // tid 2478 CGM.Int32Ty, // schedtype 2479 ITy, // lower 2480 ITy, // upper 2481 ITy, // stride 2482 ITy // chunk 2483 }; 2484 auto *FnTy = 2485 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2486 return CGM.CreateRuntimeFunction(FnTy, Name); 2487 } 2488 2489 llvm::FunctionCallee 2490 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2491 assert((IVSize == 32 || IVSize == 64) && 2492 "IV size is not compatible with the omp runtime"); 2493 StringRef Name = 2494 IVSize == 32 2495 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2496 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2497 llvm::Type *TypeParams[] = { 2498 getIdentTyPointerTy(), // loc 2499 CGM.Int32Ty, // tid 2500 }; 2501 auto *FnTy = 2502 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2503 return CGM.CreateRuntimeFunction(FnTy, Name); 2504 } 2505 2506 llvm::FunctionCallee 2507 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2508 assert((IVSize == 32 || IVSize == 64) && 2509 "IV size is not compatible with the omp runtime"); 2510 StringRef Name = 2511 IVSize == 32 2512 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2513 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2514 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2515 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2516 llvm::Type *TypeParams[] = { 2517 getIdentTyPointerTy(), // loc 2518 CGM.Int32Ty, // tid 2519 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2520 PtrTy, // p_lower 2521 PtrTy, // p_upper 2522 PtrTy // p_stride 2523 }; 2524 auto *FnTy = 2525 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2526 return CGM.CreateRuntimeFunction(FnTy, Name); 2527 } 2528 2529 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { 2530 if (CGM.getLangOpts().OpenMPSimd) 2531 return Address::invalid(); 2532 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2533 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2534 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { 2535 SmallString<64> PtrName; 2536 { 2537 llvm::raw_svector_ostream OS(PtrName); 2538 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; 2539 } 2540 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2541 if (!Ptr) { 2542 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2543 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2544 PtrName); 2545 if (!CGM.getLangOpts().OpenMPIsDevice) { 2546 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2547 GV->setLinkage(llvm::GlobalValue::ExternalLinkage); 2548 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2549 } 2550 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); 2551 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2552 } 2553 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2554 } 2555 return Address::invalid(); 2556 } 2557 2558 llvm::Constant * 2559 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2560 assert(!CGM.getLangOpts().OpenMPUseTLS || 2561 !CGM.getContext().getTargetInfo().isTLSSupported()); 2562 // Lookup the entry, lazily creating it if necessary. 2563 std::string Suffix = getName({"cache", ""}); 2564 return getOrCreateInternalVariable( 2565 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2566 } 2567 2568 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2569 const VarDecl *VD, 2570 Address VDAddr, 2571 SourceLocation Loc) { 2572 if (CGM.getLangOpts().OpenMPUseTLS && 2573 CGM.getContext().getTargetInfo().isTLSSupported()) 2574 return VDAddr; 2575 2576 llvm::Type *VarTy = VDAddr.getElementType(); 2577 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2578 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2579 CGM.Int8PtrTy), 2580 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2581 getOrCreateThreadPrivateCache(VD)}; 2582 return Address(CGF.EmitRuntimeCall( 2583 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2584 VDAddr.getAlignment()); 2585 } 2586 2587 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2588 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2589 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2590 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2591 // library. 2592 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2593 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2594 OMPLoc); 2595 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2596 // to register constructor/destructor for variable. 2597 llvm::Value *Args[] = { 2598 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2599 Ctor, CopyCtor, Dtor}; 2600 CGF.EmitRuntimeCall( 2601 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2602 } 2603 2604 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2605 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2606 bool PerformInit, CodeGenFunction *CGF) { 2607 if (CGM.getLangOpts().OpenMPUseTLS && 2608 CGM.getContext().getTargetInfo().isTLSSupported()) 2609 return nullptr; 2610 2611 VD = VD->getDefinition(CGM.getContext()); 2612 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2613 QualType ASTTy = VD->getType(); 2614 2615 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2616 const Expr *Init = VD->getAnyInitializer(); 2617 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2618 // Generate function that re-emits the declaration's initializer into the 2619 // threadprivate copy of the variable VD 2620 CodeGenFunction CtorCGF(CGM); 2621 FunctionArgList Args; 2622 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2623 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2624 ImplicitParamDecl::Other); 2625 Args.push_back(&Dst); 2626 2627 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2628 CGM.getContext().VoidPtrTy, Args); 2629 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2630 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2631 llvm::Function *Fn = 2632 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2633 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2634 Args, Loc, Loc); 2635 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2636 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2637 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2638 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2639 Arg = CtorCGF.Builder.CreateElementBitCast( 2640 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2641 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2642 /*IsInitializer=*/true); 2643 ArgVal = CtorCGF.EmitLoadOfScalar( 2644 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2645 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2646 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2647 CtorCGF.FinishFunction(); 2648 Ctor = Fn; 2649 } 2650 if (VD->getType().isDestructedType() != QualType::DK_none) { 2651 // Generate function that emits destructor call for the threadprivate copy 2652 // of the variable VD 2653 CodeGenFunction DtorCGF(CGM); 2654 FunctionArgList Args; 2655 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2656 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2657 ImplicitParamDecl::Other); 2658 Args.push_back(&Dst); 2659 2660 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2661 CGM.getContext().VoidTy, Args); 2662 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2663 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2664 llvm::Function *Fn = 2665 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2666 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2667 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2668 Loc, Loc); 2669 // Create a scope with an artificial location for the body of this function. 2670 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2671 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2672 DtorCGF.GetAddrOfLocalVar(&Dst), 2673 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2674 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2675 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2676 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2677 DtorCGF.FinishFunction(); 2678 Dtor = Fn; 2679 } 2680 // Do not emit init function if it is not required. 2681 if (!Ctor && !Dtor) 2682 return nullptr; 2683 2684 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2685 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2686 /*isVarArg=*/false) 2687 ->getPointerTo(); 2688 // Copying constructor for the threadprivate variable. 2689 // Must be NULL - reserved by runtime, but currently it requires that this 2690 // parameter is always NULL. Otherwise it fires assertion. 2691 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2692 if (Ctor == nullptr) { 2693 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2694 /*isVarArg=*/false) 2695 ->getPointerTo(); 2696 Ctor = llvm::Constant::getNullValue(CtorTy); 2697 } 2698 if (Dtor == nullptr) { 2699 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2700 /*isVarArg=*/false) 2701 ->getPointerTo(); 2702 Dtor = llvm::Constant::getNullValue(DtorTy); 2703 } 2704 if (!CGF) { 2705 auto *InitFunctionTy = 2706 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2707 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2708 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2709 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2710 CodeGenFunction InitCGF(CGM); 2711 FunctionArgList ArgList; 2712 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2713 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2714 Loc, Loc); 2715 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2716 InitCGF.FinishFunction(); 2717 return InitFunction; 2718 } 2719 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2720 } 2721 return nullptr; 2722 } 2723 2724 /// Obtain information that uniquely identifies a target entry. This 2725 /// consists of the file and device IDs as well as line number associated with 2726 /// the relevant entry source location. 2727 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2728 unsigned &DeviceID, unsigned &FileID, 2729 unsigned &LineNum) { 2730 SourceManager &SM = C.getSourceManager(); 2731 2732 // The loc should be always valid and have a file ID (the user cannot use 2733 // #pragma directives in macros) 2734 2735 assert(Loc.isValid() && "Source location is expected to be always valid."); 2736 2737 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2738 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2739 2740 llvm::sys::fs::UniqueID ID; 2741 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2742 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2743 << PLoc.getFilename() << EC.message(); 2744 2745 DeviceID = ID.getDevice(); 2746 FileID = ID.getFile(); 2747 LineNum = PLoc.getLine(); 2748 } 2749 2750 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2751 llvm::GlobalVariable *Addr, 2752 bool PerformInit) { 2753 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2754 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2755 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) 2756 return CGM.getLangOpts().OpenMPIsDevice; 2757 VD = VD->getDefinition(CGM.getContext()); 2758 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2759 return CGM.getLangOpts().OpenMPIsDevice; 2760 2761 QualType ASTTy = VD->getType(); 2762 2763 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2764 // Produce the unique prefix to identify the new target regions. We use 2765 // the source location of the variable declaration which we know to not 2766 // conflict with any target region. 2767 unsigned DeviceID; 2768 unsigned FileID; 2769 unsigned Line; 2770 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2771 SmallString<128> Buffer, Out; 2772 { 2773 llvm::raw_svector_ostream OS(Buffer); 2774 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2775 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2776 } 2777 2778 const Expr *Init = VD->getAnyInitializer(); 2779 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2780 llvm::Constant *Ctor; 2781 llvm::Constant *ID; 2782 if (CGM.getLangOpts().OpenMPIsDevice) { 2783 // Generate function that re-emits the declaration's initializer into 2784 // the threadprivate copy of the variable VD 2785 CodeGenFunction CtorCGF(CGM); 2786 2787 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2788 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2789 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2790 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2791 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2792 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2793 FunctionArgList(), Loc, Loc); 2794 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2795 CtorCGF.EmitAnyExprToMem(Init, 2796 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2797 Init->getType().getQualifiers(), 2798 /*IsInitializer=*/true); 2799 CtorCGF.FinishFunction(); 2800 Ctor = Fn; 2801 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2802 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2803 } else { 2804 Ctor = new llvm::GlobalVariable( 2805 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2806 llvm::GlobalValue::PrivateLinkage, 2807 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2808 ID = Ctor; 2809 } 2810 2811 // Register the information for the entry associated with the constructor. 2812 Out.clear(); 2813 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2814 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2815 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2816 } 2817 if (VD->getType().isDestructedType() != QualType::DK_none) { 2818 llvm::Constant *Dtor; 2819 llvm::Constant *ID; 2820 if (CGM.getLangOpts().OpenMPIsDevice) { 2821 // Generate function that emits destructor call for the threadprivate 2822 // copy of the variable VD 2823 CodeGenFunction DtorCGF(CGM); 2824 2825 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2826 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2827 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2828 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2829 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2830 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2831 FunctionArgList(), Loc, Loc); 2832 // Create a scope with an artificial location for the body of this 2833 // function. 2834 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2835 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2836 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2837 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2838 DtorCGF.FinishFunction(); 2839 Dtor = Fn; 2840 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2841 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2842 } else { 2843 Dtor = new llvm::GlobalVariable( 2844 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2845 llvm::GlobalValue::PrivateLinkage, 2846 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2847 ID = Dtor; 2848 } 2849 // Register the information for the entry associated with the destructor. 2850 Out.clear(); 2851 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2852 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2853 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2854 } 2855 return CGM.getLangOpts().OpenMPIsDevice; 2856 } 2857 2858 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2859 QualType VarType, 2860 StringRef Name) { 2861 std::string Suffix = getName({"artificial", ""}); 2862 std::string CacheSuffix = getName({"cache", ""}); 2863 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2864 llvm::Value *GAddr = 2865 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2866 llvm::Value *Args[] = { 2867 emitUpdateLocation(CGF, SourceLocation()), 2868 getThreadID(CGF, SourceLocation()), 2869 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2870 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2871 /*IsSigned=*/false), 2872 getOrCreateInternalVariable( 2873 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2874 return Address( 2875 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2876 CGF.EmitRuntimeCall( 2877 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2878 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2879 CGM.getPointerAlign()); 2880 } 2881 2882 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2883 const RegionCodeGenTy &ThenGen, 2884 const RegionCodeGenTy &ElseGen) { 2885 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2886 2887 // If the condition constant folds and can be elided, try to avoid emitting 2888 // the condition and the dead arm of the if/else. 2889 bool CondConstant; 2890 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2891 if (CondConstant) 2892 ThenGen(CGF); 2893 else 2894 ElseGen(CGF); 2895 return; 2896 } 2897 2898 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2899 // emit the conditional branch. 2900 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2901 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2902 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2903 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2904 2905 // Emit the 'then' code. 2906 CGF.EmitBlock(ThenBlock); 2907 ThenGen(CGF); 2908 CGF.EmitBranch(ContBlock); 2909 // Emit the 'else' code if present. 2910 // There is no need to emit line number for unconditional branch. 2911 (void)ApplyDebugLocation::CreateEmpty(CGF); 2912 CGF.EmitBlock(ElseBlock); 2913 ElseGen(CGF); 2914 // There is no need to emit line number for unconditional branch. 2915 (void)ApplyDebugLocation::CreateEmpty(CGF); 2916 CGF.EmitBranch(ContBlock); 2917 // Emit the continuation block for code after the if. 2918 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2919 } 2920 2921 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2922 llvm::Function *OutlinedFn, 2923 ArrayRef<llvm::Value *> CapturedVars, 2924 const Expr *IfCond) { 2925 if (!CGF.HaveInsertPoint()) 2926 return; 2927 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2928 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2929 PrePostActionTy &) { 2930 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2931 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2932 llvm::Value *Args[] = { 2933 RTLoc, 2934 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2935 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2936 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2937 RealArgs.append(std::begin(Args), std::end(Args)); 2938 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2939 2940 llvm::FunctionCallee RTLFn = 2941 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2942 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2943 }; 2944 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2945 PrePostActionTy &) { 2946 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2947 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2948 // Build calls: 2949 // __kmpc_serialized_parallel(&Loc, GTid); 2950 llvm::Value *Args[] = {RTLoc, ThreadID}; 2951 CGF.EmitRuntimeCall( 2952 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2953 2954 // OutlinedFn(>id, &zero, CapturedStruct); 2955 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2956 /*Name*/ ".zero.addr"); 2957 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2958 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2959 // ThreadId for serialized parallels is 0. 2960 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2961 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2962 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2963 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2964 2965 // __kmpc_end_serialized_parallel(&Loc, GTid); 2966 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2967 CGF.EmitRuntimeCall( 2968 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2969 EndArgs); 2970 }; 2971 if (IfCond) { 2972 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2973 } else { 2974 RegionCodeGenTy ThenRCG(ThenGen); 2975 ThenRCG(CGF); 2976 } 2977 } 2978 2979 // If we're inside an (outlined) parallel region, use the region info's 2980 // thread-ID variable (it is passed in a first argument of the outlined function 2981 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2982 // regular serial code region, get thread ID by calling kmp_int32 2983 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2984 // return the address of that temp. 2985 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2986 SourceLocation Loc) { 2987 if (auto *OMPRegionInfo = 2988 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2989 if (OMPRegionInfo->getThreadIDVariable()) 2990 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2991 2992 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2993 QualType Int32Ty = 2994 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2995 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2996 CGF.EmitStoreOfScalar(ThreadID, 2997 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2998 2999 return ThreadIDTemp; 3000 } 3001 3002 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3003 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3004 SmallString<256> Buffer; 3005 llvm::raw_svector_ostream Out(Buffer); 3006 Out << Name; 3007 StringRef RuntimeName = Out.str(); 3008 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3009 if (Elem.second) { 3010 assert(Elem.second->getType()->getPointerElementType() == Ty && 3011 "OMP internal variable has different type than requested"); 3012 return &*Elem.second; 3013 } 3014 3015 return Elem.second = new llvm::GlobalVariable( 3016 CGM.getModule(), Ty, /*IsConstant*/ false, 3017 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3018 Elem.first(), /*InsertBefore=*/nullptr, 3019 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3020 } 3021 3022 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3023 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3024 std::string Name = getName({Prefix, "var"}); 3025 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3026 } 3027 3028 namespace { 3029 /// Common pre(post)-action for different OpenMP constructs. 3030 class CommonActionTy final : public PrePostActionTy { 3031 llvm::FunctionCallee EnterCallee; 3032 ArrayRef<llvm::Value *> EnterArgs; 3033 llvm::FunctionCallee ExitCallee; 3034 ArrayRef<llvm::Value *> ExitArgs; 3035 bool Conditional; 3036 llvm::BasicBlock *ContBlock = nullptr; 3037 3038 public: 3039 CommonActionTy(llvm::FunctionCallee EnterCallee, 3040 ArrayRef<llvm::Value *> EnterArgs, 3041 llvm::FunctionCallee ExitCallee, 3042 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3043 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3044 ExitArgs(ExitArgs), Conditional(Conditional) {} 3045 void Enter(CodeGenFunction &CGF) override { 3046 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3047 if (Conditional) { 3048 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3049 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3050 ContBlock = CGF.createBasicBlock("omp_if.end"); 3051 // Generate the branch (If-stmt) 3052 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3053 CGF.EmitBlock(ThenBlock); 3054 } 3055 } 3056 void Done(CodeGenFunction &CGF) { 3057 // Emit the rest of blocks/branches 3058 CGF.EmitBranch(ContBlock); 3059 CGF.EmitBlock(ContBlock, true); 3060 } 3061 void Exit(CodeGenFunction &CGF) override { 3062 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3063 } 3064 }; 3065 } // anonymous namespace 3066 3067 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3068 StringRef CriticalName, 3069 const RegionCodeGenTy &CriticalOpGen, 3070 SourceLocation Loc, const Expr *Hint) { 3071 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3072 // CriticalOpGen(); 3073 // __kmpc_end_critical(ident_t *, gtid, Lock); 3074 // Prepare arguments and build a call to __kmpc_critical 3075 if (!CGF.HaveInsertPoint()) 3076 return; 3077 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3078 getCriticalRegionLock(CriticalName)}; 3079 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3080 std::end(Args)); 3081 if (Hint) { 3082 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3083 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3084 } 3085 CommonActionTy Action( 3086 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3087 : OMPRTL__kmpc_critical), 3088 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3089 CriticalOpGen.setAction(Action); 3090 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3091 } 3092 3093 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3094 const RegionCodeGenTy &MasterOpGen, 3095 SourceLocation Loc) { 3096 if (!CGF.HaveInsertPoint()) 3097 return; 3098 // if(__kmpc_master(ident_t *, gtid)) { 3099 // MasterOpGen(); 3100 // __kmpc_end_master(ident_t *, gtid); 3101 // } 3102 // Prepare arguments and build a call to __kmpc_master 3103 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3104 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3105 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3106 /*Conditional=*/true); 3107 MasterOpGen.setAction(Action); 3108 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3109 Action.Done(CGF); 3110 } 3111 3112 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3113 SourceLocation Loc) { 3114 if (!CGF.HaveInsertPoint()) 3115 return; 3116 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3117 llvm::Value *Args[] = { 3118 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3119 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3120 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3121 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3122 Region->emitUntiedSwitch(CGF); 3123 } 3124 3125 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3126 const RegionCodeGenTy &TaskgroupOpGen, 3127 SourceLocation Loc) { 3128 if (!CGF.HaveInsertPoint()) 3129 return; 3130 // __kmpc_taskgroup(ident_t *, gtid); 3131 // TaskgroupOpGen(); 3132 // __kmpc_end_taskgroup(ident_t *, gtid); 3133 // Prepare arguments and build a call to __kmpc_taskgroup 3134 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3135 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3136 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3137 Args); 3138 TaskgroupOpGen.setAction(Action); 3139 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3140 } 3141 3142 /// Given an array of pointers to variables, project the address of a 3143 /// given variable. 3144 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3145 unsigned Index, const VarDecl *Var) { 3146 // Pull out the pointer to the variable. 3147 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3148 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3149 3150 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3151 Addr = CGF.Builder.CreateElementBitCast( 3152 Addr, CGF.ConvertTypeForMem(Var->getType())); 3153 return Addr; 3154 } 3155 3156 static llvm::Value *emitCopyprivateCopyFunction( 3157 CodeGenModule &CGM, llvm::Type *ArgsType, 3158 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3159 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3160 SourceLocation Loc) { 3161 ASTContext &C = CGM.getContext(); 3162 // void copy_func(void *LHSArg, void *RHSArg); 3163 FunctionArgList Args; 3164 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3165 ImplicitParamDecl::Other); 3166 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3167 ImplicitParamDecl::Other); 3168 Args.push_back(&LHSArg); 3169 Args.push_back(&RHSArg); 3170 const auto &CGFI = 3171 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3172 std::string Name = 3173 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3174 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3175 llvm::GlobalValue::InternalLinkage, Name, 3176 &CGM.getModule()); 3177 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3178 Fn->setDoesNotRecurse(); 3179 CodeGenFunction CGF(CGM); 3180 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3181 // Dest = (void*[n])(LHSArg); 3182 // Src = (void*[n])(RHSArg); 3183 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3184 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3185 ArgsType), CGF.getPointerAlign()); 3186 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3187 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3188 ArgsType), CGF.getPointerAlign()); 3189 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3190 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3191 // ... 3192 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3193 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3194 const auto *DestVar = 3195 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3196 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3197 3198 const auto *SrcVar = 3199 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3200 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3201 3202 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3203 QualType Type = VD->getType(); 3204 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3205 } 3206 CGF.FinishFunction(); 3207 return Fn; 3208 } 3209 3210 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3211 const RegionCodeGenTy &SingleOpGen, 3212 SourceLocation Loc, 3213 ArrayRef<const Expr *> CopyprivateVars, 3214 ArrayRef<const Expr *> SrcExprs, 3215 ArrayRef<const Expr *> DstExprs, 3216 ArrayRef<const Expr *> AssignmentOps) { 3217 if (!CGF.HaveInsertPoint()) 3218 return; 3219 assert(CopyprivateVars.size() == SrcExprs.size() && 3220 CopyprivateVars.size() == DstExprs.size() && 3221 CopyprivateVars.size() == AssignmentOps.size()); 3222 ASTContext &C = CGM.getContext(); 3223 // int32 did_it = 0; 3224 // if(__kmpc_single(ident_t *, gtid)) { 3225 // SingleOpGen(); 3226 // __kmpc_end_single(ident_t *, gtid); 3227 // did_it = 1; 3228 // } 3229 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3230 // <copy_func>, did_it); 3231 3232 Address DidIt = Address::invalid(); 3233 if (!CopyprivateVars.empty()) { 3234 // int32 did_it = 0; 3235 QualType KmpInt32Ty = 3236 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3237 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3238 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3239 } 3240 // Prepare arguments and build a call to __kmpc_single 3241 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3242 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3243 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3244 /*Conditional=*/true); 3245 SingleOpGen.setAction(Action); 3246 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3247 if (DidIt.isValid()) { 3248 // did_it = 1; 3249 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3250 } 3251 Action.Done(CGF); 3252 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3253 // <copy_func>, did_it); 3254 if (DidIt.isValid()) { 3255 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3256 QualType CopyprivateArrayTy = 3257 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3258 /*IndexTypeQuals=*/0); 3259 // Create a list of all private variables for copyprivate. 3260 Address CopyprivateList = 3261 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3262 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3263 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3264 CGF.Builder.CreateStore( 3265 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3266 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3267 Elem); 3268 } 3269 // Build function that copies private values from single region to all other 3270 // threads in the corresponding parallel region. 3271 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3272 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3273 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3274 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3275 Address CL = 3276 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3277 CGF.VoidPtrTy); 3278 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3279 llvm::Value *Args[] = { 3280 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3281 getThreadID(CGF, Loc), // i32 <gtid> 3282 BufSize, // size_t <buf_size> 3283 CL.getPointer(), // void *<copyprivate list> 3284 CpyFn, // void (*) (void *, void *) <copy_func> 3285 DidItVal // i32 did_it 3286 }; 3287 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3288 } 3289 } 3290 3291 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3292 const RegionCodeGenTy &OrderedOpGen, 3293 SourceLocation Loc, bool IsThreads) { 3294 if (!CGF.HaveInsertPoint()) 3295 return; 3296 // __kmpc_ordered(ident_t *, gtid); 3297 // OrderedOpGen(); 3298 // __kmpc_end_ordered(ident_t *, gtid); 3299 // Prepare arguments and build a call to __kmpc_ordered 3300 if (IsThreads) { 3301 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3302 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3303 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3304 Args); 3305 OrderedOpGen.setAction(Action); 3306 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3307 return; 3308 } 3309 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3310 } 3311 3312 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3313 unsigned Flags; 3314 if (Kind == OMPD_for) 3315 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3316 else if (Kind == OMPD_sections) 3317 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3318 else if (Kind == OMPD_single) 3319 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3320 else if (Kind == OMPD_barrier) 3321 Flags = OMP_IDENT_BARRIER_EXPL; 3322 else 3323 Flags = OMP_IDENT_BARRIER_IMPL; 3324 return Flags; 3325 } 3326 3327 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3328 CodeGenFunction &CGF, const OMPLoopDirective &S, 3329 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3330 // Check if the loop directive is actually a doacross loop directive. In this 3331 // case choose static, 1 schedule. 3332 if (llvm::any_of( 3333 S.getClausesOfKind<OMPOrderedClause>(), 3334 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3335 ScheduleKind = OMPC_SCHEDULE_static; 3336 // Chunk size is 1 in this case. 3337 llvm::APInt ChunkSize(32, 1); 3338 ChunkExpr = IntegerLiteral::Create( 3339 CGF.getContext(), ChunkSize, 3340 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3341 SourceLocation()); 3342 } 3343 } 3344 3345 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3346 OpenMPDirectiveKind Kind, bool EmitChecks, 3347 bool ForceSimpleCall) { 3348 if (!CGF.HaveInsertPoint()) 3349 return; 3350 // Build call __kmpc_cancel_barrier(loc, thread_id); 3351 // Build call __kmpc_barrier(loc, thread_id); 3352 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3353 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3354 // thread_id); 3355 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3356 getThreadID(CGF, Loc)}; 3357 if (auto *OMPRegionInfo = 3358 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3359 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3360 llvm::Value *Result = CGF.EmitRuntimeCall( 3361 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3362 if (EmitChecks) { 3363 // if (__kmpc_cancel_barrier()) { 3364 // exit from construct; 3365 // } 3366 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3367 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3368 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3369 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3370 CGF.EmitBlock(ExitBB); 3371 // exit from construct; 3372 CodeGenFunction::JumpDest CancelDestination = 3373 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3374 CGF.EmitBranchThroughCleanup(CancelDestination); 3375 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3376 } 3377 return; 3378 } 3379 } 3380 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3381 } 3382 3383 /// Map the OpenMP loop schedule to the runtime enumeration. 3384 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3385 bool Chunked, bool Ordered) { 3386 switch (ScheduleKind) { 3387 case OMPC_SCHEDULE_static: 3388 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3389 : (Ordered ? OMP_ord_static : OMP_sch_static); 3390 case OMPC_SCHEDULE_dynamic: 3391 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3392 case OMPC_SCHEDULE_guided: 3393 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3394 case OMPC_SCHEDULE_runtime: 3395 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3396 case OMPC_SCHEDULE_auto: 3397 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3398 case OMPC_SCHEDULE_unknown: 3399 assert(!Chunked && "chunk was specified but schedule kind not known"); 3400 return Ordered ? OMP_ord_static : OMP_sch_static; 3401 } 3402 llvm_unreachable("Unexpected runtime schedule"); 3403 } 3404 3405 /// Map the OpenMP distribute schedule to the runtime enumeration. 3406 static OpenMPSchedType 3407 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3408 // only static is allowed for dist_schedule 3409 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3410 } 3411 3412 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3413 bool Chunked) const { 3414 OpenMPSchedType Schedule = 3415 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3416 return Schedule == OMP_sch_static; 3417 } 3418 3419 bool CGOpenMPRuntime::isStaticNonchunked( 3420 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3421 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3422 return Schedule == OMP_dist_sch_static; 3423 } 3424 3425 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3426 bool Chunked) const { 3427 OpenMPSchedType Schedule = 3428 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3429 return Schedule == OMP_sch_static_chunked; 3430 } 3431 3432 bool CGOpenMPRuntime::isStaticChunked( 3433 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3434 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3435 return Schedule == OMP_dist_sch_static_chunked; 3436 } 3437 3438 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3439 OpenMPSchedType Schedule = 3440 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3441 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3442 return Schedule != OMP_sch_static; 3443 } 3444 3445 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3446 OpenMPScheduleClauseModifier M1, 3447 OpenMPScheduleClauseModifier M2) { 3448 int Modifier = 0; 3449 switch (M1) { 3450 case OMPC_SCHEDULE_MODIFIER_monotonic: 3451 Modifier = OMP_sch_modifier_monotonic; 3452 break; 3453 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3454 Modifier = OMP_sch_modifier_nonmonotonic; 3455 break; 3456 case OMPC_SCHEDULE_MODIFIER_simd: 3457 if (Schedule == OMP_sch_static_chunked) 3458 Schedule = OMP_sch_static_balanced_chunked; 3459 break; 3460 case OMPC_SCHEDULE_MODIFIER_last: 3461 case OMPC_SCHEDULE_MODIFIER_unknown: 3462 break; 3463 } 3464 switch (M2) { 3465 case OMPC_SCHEDULE_MODIFIER_monotonic: 3466 Modifier = OMP_sch_modifier_monotonic; 3467 break; 3468 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3469 Modifier = OMP_sch_modifier_nonmonotonic; 3470 break; 3471 case OMPC_SCHEDULE_MODIFIER_simd: 3472 if (Schedule == OMP_sch_static_chunked) 3473 Schedule = OMP_sch_static_balanced_chunked; 3474 break; 3475 case OMPC_SCHEDULE_MODIFIER_last: 3476 case OMPC_SCHEDULE_MODIFIER_unknown: 3477 break; 3478 } 3479 return Schedule | Modifier; 3480 } 3481 3482 void CGOpenMPRuntime::emitForDispatchInit( 3483 CodeGenFunction &CGF, SourceLocation Loc, 3484 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3485 bool Ordered, const DispatchRTInput &DispatchValues) { 3486 if (!CGF.HaveInsertPoint()) 3487 return; 3488 OpenMPSchedType Schedule = getRuntimeSchedule( 3489 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3490 assert(Ordered || 3491 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3492 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3493 Schedule != OMP_sch_static_balanced_chunked)); 3494 // Call __kmpc_dispatch_init( 3495 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3496 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3497 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3498 3499 // If the Chunk was not specified in the clause - use default value 1. 3500 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3501 : CGF.Builder.getIntN(IVSize, 1); 3502 llvm::Value *Args[] = { 3503 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3504 CGF.Builder.getInt32(addMonoNonMonoModifier( 3505 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3506 DispatchValues.LB, // Lower 3507 DispatchValues.UB, // Upper 3508 CGF.Builder.getIntN(IVSize, 1), // Stride 3509 Chunk // Chunk 3510 }; 3511 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3512 } 3513 3514 static void emitForStaticInitCall( 3515 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3516 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3517 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3518 const CGOpenMPRuntime::StaticRTInput &Values) { 3519 if (!CGF.HaveInsertPoint()) 3520 return; 3521 3522 assert(!Values.Ordered); 3523 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3524 Schedule == OMP_sch_static_balanced_chunked || 3525 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3526 Schedule == OMP_dist_sch_static || 3527 Schedule == OMP_dist_sch_static_chunked); 3528 3529 // Call __kmpc_for_static_init( 3530 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3531 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3532 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3533 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3534 llvm::Value *Chunk = Values.Chunk; 3535 if (Chunk == nullptr) { 3536 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3537 Schedule == OMP_dist_sch_static) && 3538 "expected static non-chunked schedule"); 3539 // If the Chunk was not specified in the clause - use default value 1. 3540 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3541 } else { 3542 assert((Schedule == OMP_sch_static_chunked || 3543 Schedule == OMP_sch_static_balanced_chunked || 3544 Schedule == OMP_ord_static_chunked || 3545 Schedule == OMP_dist_sch_static_chunked) && 3546 "expected static chunked schedule"); 3547 } 3548 llvm::Value *Args[] = { 3549 UpdateLocation, 3550 ThreadId, 3551 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3552 M2)), // Schedule type 3553 Values.IL.getPointer(), // &isLastIter 3554 Values.LB.getPointer(), // &LB 3555 Values.UB.getPointer(), // &UB 3556 Values.ST.getPointer(), // &Stride 3557 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3558 Chunk // Chunk 3559 }; 3560 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3561 } 3562 3563 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3564 SourceLocation Loc, 3565 OpenMPDirectiveKind DKind, 3566 const OpenMPScheduleTy &ScheduleKind, 3567 const StaticRTInput &Values) { 3568 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3569 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3570 assert(isOpenMPWorksharingDirective(DKind) && 3571 "Expected loop-based or sections-based directive."); 3572 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3573 isOpenMPLoopDirective(DKind) 3574 ? OMP_IDENT_WORK_LOOP 3575 : OMP_IDENT_WORK_SECTIONS); 3576 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3577 llvm::FunctionCallee StaticInitFunction = 3578 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3579 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3580 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3581 } 3582 3583 void CGOpenMPRuntime::emitDistributeStaticInit( 3584 CodeGenFunction &CGF, SourceLocation Loc, 3585 OpenMPDistScheduleClauseKind SchedKind, 3586 const CGOpenMPRuntime::StaticRTInput &Values) { 3587 OpenMPSchedType ScheduleNum = 3588 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3589 llvm::Value *UpdatedLocation = 3590 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3591 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3592 llvm::FunctionCallee StaticInitFunction = 3593 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3594 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3595 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3596 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3597 } 3598 3599 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3600 SourceLocation Loc, 3601 OpenMPDirectiveKind DKind) { 3602 if (!CGF.HaveInsertPoint()) 3603 return; 3604 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3605 llvm::Value *Args[] = { 3606 emitUpdateLocation(CGF, Loc, 3607 isOpenMPDistributeDirective(DKind) 3608 ? OMP_IDENT_WORK_DISTRIBUTE 3609 : isOpenMPLoopDirective(DKind) 3610 ? OMP_IDENT_WORK_LOOP 3611 : OMP_IDENT_WORK_SECTIONS), 3612 getThreadID(CGF, Loc)}; 3613 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3614 Args); 3615 } 3616 3617 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3618 SourceLocation Loc, 3619 unsigned IVSize, 3620 bool IVSigned) { 3621 if (!CGF.HaveInsertPoint()) 3622 return; 3623 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3624 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3625 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3626 } 3627 3628 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3629 SourceLocation Loc, unsigned IVSize, 3630 bool IVSigned, Address IL, 3631 Address LB, Address UB, 3632 Address ST) { 3633 // Call __kmpc_dispatch_next( 3634 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3635 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3636 // kmp_int[32|64] *p_stride); 3637 llvm::Value *Args[] = { 3638 emitUpdateLocation(CGF, Loc), 3639 getThreadID(CGF, Loc), 3640 IL.getPointer(), // &isLastIter 3641 LB.getPointer(), // &Lower 3642 UB.getPointer(), // &Upper 3643 ST.getPointer() // &Stride 3644 }; 3645 llvm::Value *Call = 3646 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3647 return CGF.EmitScalarConversion( 3648 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3649 CGF.getContext().BoolTy, Loc); 3650 } 3651 3652 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3653 llvm::Value *NumThreads, 3654 SourceLocation Loc) { 3655 if (!CGF.HaveInsertPoint()) 3656 return; 3657 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3658 llvm::Value *Args[] = { 3659 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3660 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3661 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3662 Args); 3663 } 3664 3665 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3666 OpenMPProcBindClauseKind ProcBind, 3667 SourceLocation Loc) { 3668 if (!CGF.HaveInsertPoint()) 3669 return; 3670 // Constants for proc bind value accepted by the runtime. 3671 enum ProcBindTy { 3672 ProcBindFalse = 0, 3673 ProcBindTrue, 3674 ProcBindMaster, 3675 ProcBindClose, 3676 ProcBindSpread, 3677 ProcBindIntel, 3678 ProcBindDefault 3679 } RuntimeProcBind; 3680 switch (ProcBind) { 3681 case OMPC_PROC_BIND_master: 3682 RuntimeProcBind = ProcBindMaster; 3683 break; 3684 case OMPC_PROC_BIND_close: 3685 RuntimeProcBind = ProcBindClose; 3686 break; 3687 case OMPC_PROC_BIND_spread: 3688 RuntimeProcBind = ProcBindSpread; 3689 break; 3690 case OMPC_PROC_BIND_unknown: 3691 llvm_unreachable("Unsupported proc_bind value."); 3692 } 3693 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3694 llvm::Value *Args[] = { 3695 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3696 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3697 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3698 } 3699 3700 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3701 SourceLocation Loc) { 3702 if (!CGF.HaveInsertPoint()) 3703 return; 3704 // Build call void __kmpc_flush(ident_t *loc) 3705 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3706 emitUpdateLocation(CGF, Loc)); 3707 } 3708 3709 namespace { 3710 /// Indexes of fields for type kmp_task_t. 3711 enum KmpTaskTFields { 3712 /// List of shared variables. 3713 KmpTaskTShareds, 3714 /// Task routine. 3715 KmpTaskTRoutine, 3716 /// Partition id for the untied tasks. 3717 KmpTaskTPartId, 3718 /// Function with call of destructors for private variables. 3719 Data1, 3720 /// Task priority. 3721 Data2, 3722 /// (Taskloops only) Lower bound. 3723 KmpTaskTLowerBound, 3724 /// (Taskloops only) Upper bound. 3725 KmpTaskTUpperBound, 3726 /// (Taskloops only) Stride. 3727 KmpTaskTStride, 3728 /// (Taskloops only) Is last iteration flag. 3729 KmpTaskTLastIter, 3730 /// (Taskloops only) Reduction data. 3731 KmpTaskTReductions, 3732 }; 3733 } // anonymous namespace 3734 3735 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3736 return OffloadEntriesTargetRegion.empty() && 3737 OffloadEntriesDeviceGlobalVar.empty(); 3738 } 3739 3740 /// Initialize target region entry. 3741 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3742 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3743 StringRef ParentName, unsigned LineNum, 3744 unsigned Order) { 3745 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3746 "only required for the device " 3747 "code generation."); 3748 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3749 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3750 OMPTargetRegionEntryTargetRegion); 3751 ++OffloadingEntriesNum; 3752 } 3753 3754 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3755 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3756 StringRef ParentName, unsigned LineNum, 3757 llvm::Constant *Addr, llvm::Constant *ID, 3758 OMPTargetRegionEntryKind Flags) { 3759 // If we are emitting code for a target, the entry is already initialized, 3760 // only has to be registered. 3761 if (CGM.getLangOpts().OpenMPIsDevice) { 3762 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3763 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3764 DiagnosticsEngine::Error, 3765 "Unable to find target region on line '%0' in the device code."); 3766 CGM.getDiags().Report(DiagID) << LineNum; 3767 return; 3768 } 3769 auto &Entry = 3770 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3771 assert(Entry.isValid() && "Entry not initialized!"); 3772 Entry.setAddress(Addr); 3773 Entry.setID(ID); 3774 Entry.setFlags(Flags); 3775 } else { 3776 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3777 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3778 ++OffloadingEntriesNum; 3779 } 3780 } 3781 3782 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3783 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3784 unsigned LineNum) const { 3785 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3786 if (PerDevice == OffloadEntriesTargetRegion.end()) 3787 return false; 3788 auto PerFile = PerDevice->second.find(FileID); 3789 if (PerFile == PerDevice->second.end()) 3790 return false; 3791 auto PerParentName = PerFile->second.find(ParentName); 3792 if (PerParentName == PerFile->second.end()) 3793 return false; 3794 auto PerLine = PerParentName->second.find(LineNum); 3795 if (PerLine == PerParentName->second.end()) 3796 return false; 3797 // Fail if this entry is already registered. 3798 if (PerLine->second.getAddress() || PerLine->second.getID()) 3799 return false; 3800 return true; 3801 } 3802 3803 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3804 const OffloadTargetRegionEntryInfoActTy &Action) { 3805 // Scan all target region entries and perform the provided action. 3806 for (const auto &D : OffloadEntriesTargetRegion) 3807 for (const auto &F : D.second) 3808 for (const auto &P : F.second) 3809 for (const auto &L : P.second) 3810 Action(D.first, F.first, P.first(), L.first, L.second); 3811 } 3812 3813 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3814 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3815 OMPTargetGlobalVarEntryKind Flags, 3816 unsigned Order) { 3817 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3818 "only required for the device " 3819 "code generation."); 3820 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3821 ++OffloadingEntriesNum; 3822 } 3823 3824 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3825 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3826 CharUnits VarSize, 3827 OMPTargetGlobalVarEntryKind Flags, 3828 llvm::GlobalValue::LinkageTypes Linkage) { 3829 if (CGM.getLangOpts().OpenMPIsDevice) { 3830 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3831 assert(Entry.isValid() && Entry.getFlags() == Flags && 3832 "Entry not initialized!"); 3833 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3834 "Resetting with the new address."); 3835 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3836 if (Entry.getVarSize().isZero()) { 3837 Entry.setVarSize(VarSize); 3838 Entry.setLinkage(Linkage); 3839 } 3840 return; 3841 } 3842 Entry.setVarSize(VarSize); 3843 Entry.setLinkage(Linkage); 3844 Entry.setAddress(Addr); 3845 } else { 3846 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3847 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3848 assert(Entry.isValid() && Entry.getFlags() == Flags && 3849 "Entry not initialized!"); 3850 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3851 "Resetting with the new address."); 3852 if (Entry.getVarSize().isZero()) { 3853 Entry.setVarSize(VarSize); 3854 Entry.setLinkage(Linkage); 3855 } 3856 return; 3857 } 3858 OffloadEntriesDeviceGlobalVar.try_emplace( 3859 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3860 ++OffloadingEntriesNum; 3861 } 3862 } 3863 3864 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3865 actOnDeviceGlobalVarEntriesInfo( 3866 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3867 // Scan all target region entries and perform the provided action. 3868 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3869 Action(E.getKey(), E.getValue()); 3870 } 3871 3872 llvm::Function * 3873 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3874 // If we don't have entries or if we are emitting code for the device, we 3875 // don't need to do anything. 3876 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3877 return nullptr; 3878 3879 llvm::Module &M = CGM.getModule(); 3880 ASTContext &C = CGM.getContext(); 3881 3882 // Get list of devices we care about 3883 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3884 3885 // We should be creating an offloading descriptor only if there are devices 3886 // specified. 3887 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3888 3889 // Create the external variables that will point to the begin and end of the 3890 // host entries section. These will be defined by the linker. 3891 llvm::Type *OffloadEntryTy = 3892 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3893 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3894 auto *HostEntriesBegin = new llvm::GlobalVariable( 3895 M, OffloadEntryTy, /*isConstant=*/true, 3896 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3897 EntriesBeginName); 3898 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3899 auto *HostEntriesEnd = 3900 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3901 llvm::GlobalValue::ExternalLinkage, 3902 /*Initializer=*/nullptr, EntriesEndName); 3903 3904 // Create all device images 3905 auto *DeviceImageTy = cast<llvm::StructType>( 3906 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3907 ConstantInitBuilder DeviceImagesBuilder(CGM); 3908 ConstantArrayBuilder DeviceImagesEntries = 3909 DeviceImagesBuilder.beginArray(DeviceImageTy); 3910 3911 for (const llvm::Triple &Device : Devices) { 3912 StringRef T = Device.getTriple(); 3913 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3914 auto *ImgBegin = new llvm::GlobalVariable( 3915 M, CGM.Int8Ty, /*isConstant=*/true, 3916 llvm::GlobalValue::ExternalWeakLinkage, 3917 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3918 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3919 auto *ImgEnd = new llvm::GlobalVariable( 3920 M, CGM.Int8Ty, /*isConstant=*/true, 3921 llvm::GlobalValue::ExternalWeakLinkage, 3922 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3923 3924 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3925 HostEntriesEnd}; 3926 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3927 DeviceImagesEntries); 3928 } 3929 3930 // Create device images global array. 3931 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3932 llvm::GlobalVariable *DeviceImages = 3933 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 3934 CGM.getPointerAlign(), 3935 /*isConstant=*/true); 3936 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3937 3938 // This is a Zero array to be used in the creation of the constant expressions 3939 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3940 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3941 3942 // Create the target region descriptor. 3943 llvm::Constant *Data[] = { 3944 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 3945 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3946 DeviceImages, Index), 3947 HostEntriesBegin, HostEntriesEnd}; 3948 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 3949 llvm::GlobalVariable *Desc = createGlobalStruct( 3950 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 3951 3952 // Emit code to register or unregister the descriptor at execution 3953 // startup or closing, respectively. 3954 3955 llvm::Function *UnRegFn; 3956 { 3957 FunctionArgList Args; 3958 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3959 Args.push_back(&DummyPtr); 3960 3961 CodeGenFunction CGF(CGM); 3962 // Disable debug info for global (de-)initializer because they are not part 3963 // of some particular construct. 3964 CGF.disableDebugInfo(); 3965 const auto &FI = 3966 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3967 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3968 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 3969 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 3970 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 3971 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3972 Desc); 3973 CGF.FinishFunction(); 3974 } 3975 llvm::Function *RegFn; 3976 { 3977 CodeGenFunction CGF(CGM); 3978 // Disable debug info for global (de-)initializer because they are not part 3979 // of some particular construct. 3980 CGF.disableDebugInfo(); 3981 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 3982 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3983 3984 // Encode offload target triples into the registration function name. It 3985 // will serve as a comdat key for the registration/unregistration code for 3986 // this particular combination of offloading targets. 3987 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 3988 RegFnNameParts[0] = "omp_offloading"; 3989 RegFnNameParts[1] = "descriptor_reg"; 3990 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 3991 [](const llvm::Triple &T) -> const std::string& { 3992 return T.getTriple(); 3993 }); 3994 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 3995 std::string Descriptor = getName(RegFnNameParts); 3996 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 3997 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 3998 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 3999 // Create a variable to drive the registration and unregistration of the 4000 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 4001 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 4002 SourceLocation(), nullptr, C.CharTy, 4003 ImplicitParamDecl::Other); 4004 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 4005 CGF.FinishFunction(); 4006 } 4007 if (CGM.supportsCOMDAT()) { 4008 // It is sufficient to call registration function only once, so create a 4009 // COMDAT group for registration/unregistration functions and associated 4010 // data. That would reduce startup time and code size. Registration 4011 // function serves as a COMDAT group key. 4012 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 4013 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 4014 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 4015 RegFn->setComdat(ComdatKey); 4016 UnRegFn->setComdat(ComdatKey); 4017 DeviceImages->setComdat(ComdatKey); 4018 Desc->setComdat(ComdatKey); 4019 } 4020 return RegFn; 4021 } 4022 4023 void CGOpenMPRuntime::createOffloadEntry( 4024 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4025 llvm::GlobalValue::LinkageTypes Linkage) { 4026 StringRef Name = Addr->getName(); 4027 llvm::Module &M = CGM.getModule(); 4028 llvm::LLVMContext &C = M.getContext(); 4029 4030 // Create constant string with the name. 4031 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4032 4033 std::string StringName = getName({"omp_offloading", "entry_name"}); 4034 auto *Str = new llvm::GlobalVariable( 4035 M, StrPtrInit->getType(), /*isConstant=*/true, 4036 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4037 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4038 4039 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4040 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4041 llvm::ConstantInt::get(CGM.SizeTy, Size), 4042 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4043 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4044 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4045 llvm::GlobalVariable *Entry = createGlobalStruct( 4046 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4047 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4048 4049 // The entry has to be created in the section the linker expects it to be. 4050 std::string Section = getName({"omp_offloading", "entries"}); 4051 Entry->setSection(Section); 4052 } 4053 4054 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4055 // Emit the offloading entries and metadata so that the device codegen side 4056 // can easily figure out what to emit. The produced metadata looks like 4057 // this: 4058 // 4059 // !omp_offload.info = !{!1, ...} 4060 // 4061 // Right now we only generate metadata for function that contain target 4062 // regions. 4063 4064 // If we do not have entries, we don't need to do anything. 4065 if (OffloadEntriesInfoManager.empty()) 4066 return; 4067 4068 llvm::Module &M = CGM.getModule(); 4069 llvm::LLVMContext &C = M.getContext(); 4070 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 4071 OrderedEntries(OffloadEntriesInfoManager.size()); 4072 llvm::SmallVector<StringRef, 16> ParentFunctions( 4073 OffloadEntriesInfoManager.size()); 4074 4075 // Auxiliary methods to create metadata values and strings. 4076 auto &&GetMDInt = [this](unsigned V) { 4077 return llvm::ConstantAsMetadata::get( 4078 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4079 }; 4080 4081 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4082 4083 // Create the offloading info metadata node. 4084 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4085 4086 // Create function that emits metadata for each target region entry; 4087 auto &&TargetRegionMetadataEmitter = 4088 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4089 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4090 unsigned Line, 4091 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4092 // Generate metadata for target regions. Each entry of this metadata 4093 // contains: 4094 // - Entry 0 -> Kind of this type of metadata (0). 4095 // - Entry 1 -> Device ID of the file where the entry was identified. 4096 // - Entry 2 -> File ID of the file where the entry was identified. 4097 // - Entry 3 -> Mangled name of the function where the entry was 4098 // identified. 4099 // - Entry 4 -> Line in the file where the entry was identified. 4100 // - Entry 5 -> Order the entry was created. 4101 // The first element of the metadata node is the kind. 4102 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4103 GetMDInt(FileID), GetMDString(ParentName), 4104 GetMDInt(Line), GetMDInt(E.getOrder())}; 4105 4106 // Save this entry in the right position of the ordered entries array. 4107 OrderedEntries[E.getOrder()] = &E; 4108 ParentFunctions[E.getOrder()] = ParentName; 4109 4110 // Add metadata to the named metadata node. 4111 MD->addOperand(llvm::MDNode::get(C, Ops)); 4112 }; 4113 4114 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4115 TargetRegionMetadataEmitter); 4116 4117 // Create function that emits metadata for each device global variable entry; 4118 auto &&DeviceGlobalVarMetadataEmitter = 4119 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4120 MD](StringRef MangledName, 4121 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4122 &E) { 4123 // Generate metadata for global variables. Each entry of this metadata 4124 // contains: 4125 // - Entry 0 -> Kind of this type of metadata (1). 4126 // - Entry 1 -> Mangled name of the variable. 4127 // - Entry 2 -> Declare target kind. 4128 // - Entry 3 -> Order the entry was created. 4129 // The first element of the metadata node is the kind. 4130 llvm::Metadata *Ops[] = { 4131 GetMDInt(E.getKind()), GetMDString(MangledName), 4132 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4133 4134 // Save this entry in the right position of the ordered entries array. 4135 OrderedEntries[E.getOrder()] = &E; 4136 4137 // Add metadata to the named metadata node. 4138 MD->addOperand(llvm::MDNode::get(C, Ops)); 4139 }; 4140 4141 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4142 DeviceGlobalVarMetadataEmitter); 4143 4144 for (const auto *E : OrderedEntries) { 4145 assert(E && "All ordered entries must exist!"); 4146 if (const auto *CE = 4147 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4148 E)) { 4149 if (!CE->getID() || !CE->getAddress()) { 4150 // Do not blame the entry if the parent funtion is not emitted. 4151 StringRef FnName = ParentFunctions[CE->getOrder()]; 4152 if (!CGM.GetGlobalValue(FnName)) 4153 continue; 4154 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4155 DiagnosticsEngine::Error, 4156 "Offloading entry for target region is incorrect: either the " 4157 "address or the ID is invalid."); 4158 CGM.getDiags().Report(DiagID); 4159 continue; 4160 } 4161 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4162 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4163 } else if (const auto *CE = 4164 dyn_cast<OffloadEntriesInfoManagerTy:: 4165 OffloadEntryInfoDeviceGlobalVar>(E)) { 4166 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4167 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4168 CE->getFlags()); 4169 switch (Flags) { 4170 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4171 if (!CE->getAddress()) { 4172 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4173 DiagnosticsEngine::Error, 4174 "Offloading entry for declare target variable is incorrect: the " 4175 "address is invalid."); 4176 CGM.getDiags().Report(DiagID); 4177 continue; 4178 } 4179 // The vaiable has no definition - no need to add the entry. 4180 if (CE->getVarSize().isZero()) 4181 continue; 4182 break; 4183 } 4184 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4185 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4186 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4187 "Declaret target link address is set."); 4188 if (CGM.getLangOpts().OpenMPIsDevice) 4189 continue; 4190 if (!CE->getAddress()) { 4191 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4192 DiagnosticsEngine::Error, 4193 "Offloading entry for declare target variable is incorrect: the " 4194 "address is invalid."); 4195 CGM.getDiags().Report(DiagID); 4196 continue; 4197 } 4198 break; 4199 } 4200 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4201 CE->getVarSize().getQuantity(), Flags, 4202 CE->getLinkage()); 4203 } else { 4204 llvm_unreachable("Unsupported entry kind."); 4205 } 4206 } 4207 } 4208 4209 /// Loads all the offload entries information from the host IR 4210 /// metadata. 4211 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4212 // If we are in target mode, load the metadata from the host IR. This code has 4213 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4214 4215 if (!CGM.getLangOpts().OpenMPIsDevice) 4216 return; 4217 4218 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4219 return; 4220 4221 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4222 if (auto EC = Buf.getError()) { 4223 CGM.getDiags().Report(diag::err_cannot_open_file) 4224 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4225 return; 4226 } 4227 4228 llvm::LLVMContext C; 4229 auto ME = expectedToErrorOrAndEmitErrors( 4230 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4231 4232 if (auto EC = ME.getError()) { 4233 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4234 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4235 CGM.getDiags().Report(DiagID) 4236 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4237 return; 4238 } 4239 4240 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4241 if (!MD) 4242 return; 4243 4244 for (llvm::MDNode *MN : MD->operands()) { 4245 auto &&GetMDInt = [MN](unsigned Idx) { 4246 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4247 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4248 }; 4249 4250 auto &&GetMDString = [MN](unsigned Idx) { 4251 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4252 return V->getString(); 4253 }; 4254 4255 switch (GetMDInt(0)) { 4256 default: 4257 llvm_unreachable("Unexpected metadata!"); 4258 break; 4259 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4260 OffloadingEntryInfoTargetRegion: 4261 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4262 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4263 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4264 /*Order=*/GetMDInt(5)); 4265 break; 4266 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4267 OffloadingEntryInfoDeviceGlobalVar: 4268 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4269 /*MangledName=*/GetMDString(1), 4270 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4271 /*Flags=*/GetMDInt(2)), 4272 /*Order=*/GetMDInt(3)); 4273 break; 4274 } 4275 } 4276 } 4277 4278 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4279 if (!KmpRoutineEntryPtrTy) { 4280 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4281 ASTContext &C = CGM.getContext(); 4282 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4283 FunctionProtoType::ExtProtoInfo EPI; 4284 KmpRoutineEntryPtrQTy = C.getPointerType( 4285 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4286 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4287 } 4288 } 4289 4290 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4291 // Make sure the type of the entry is already created. This is the type we 4292 // have to create: 4293 // struct __tgt_offload_entry{ 4294 // void *addr; // Pointer to the offload entry info. 4295 // // (function or global) 4296 // char *name; // Name of the function or global. 4297 // size_t size; // Size of the entry info (0 if it a function). 4298 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4299 // int32_t reserved; // Reserved, to use by the runtime library. 4300 // }; 4301 if (TgtOffloadEntryQTy.isNull()) { 4302 ASTContext &C = CGM.getContext(); 4303 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4304 RD->startDefinition(); 4305 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4306 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4307 addFieldToRecordDecl(C, RD, C.getSizeType()); 4308 addFieldToRecordDecl( 4309 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4310 addFieldToRecordDecl( 4311 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4312 RD->completeDefinition(); 4313 RD->addAttr(PackedAttr::CreateImplicit(C)); 4314 TgtOffloadEntryQTy = C.getRecordType(RD); 4315 } 4316 return TgtOffloadEntryQTy; 4317 } 4318 4319 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4320 // These are the types we need to build: 4321 // struct __tgt_device_image{ 4322 // void *ImageStart; // Pointer to the target code start. 4323 // void *ImageEnd; // Pointer to the target code end. 4324 // // We also add the host entries to the device image, as it may be useful 4325 // // for the target runtime to have access to that information. 4326 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4327 // // the entries. 4328 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4329 // // entries (non inclusive). 4330 // }; 4331 if (TgtDeviceImageQTy.isNull()) { 4332 ASTContext &C = CGM.getContext(); 4333 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4334 RD->startDefinition(); 4335 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4336 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4337 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4338 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4339 RD->completeDefinition(); 4340 TgtDeviceImageQTy = C.getRecordType(RD); 4341 } 4342 return TgtDeviceImageQTy; 4343 } 4344 4345 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4346 // struct __tgt_bin_desc{ 4347 // int32_t NumDevices; // Number of devices supported. 4348 // __tgt_device_image *DeviceImages; // Arrays of device images 4349 // // (one per device). 4350 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4351 // // entries. 4352 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4353 // // entries (non inclusive). 4354 // }; 4355 if (TgtBinaryDescriptorQTy.isNull()) { 4356 ASTContext &C = CGM.getContext(); 4357 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4358 RD->startDefinition(); 4359 addFieldToRecordDecl( 4360 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4361 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4362 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4363 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4364 RD->completeDefinition(); 4365 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4366 } 4367 return TgtBinaryDescriptorQTy; 4368 } 4369 4370 namespace { 4371 struct PrivateHelpersTy { 4372 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4373 const VarDecl *PrivateElemInit) 4374 : Original(Original), PrivateCopy(PrivateCopy), 4375 PrivateElemInit(PrivateElemInit) {} 4376 const VarDecl *Original; 4377 const VarDecl *PrivateCopy; 4378 const VarDecl *PrivateElemInit; 4379 }; 4380 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4381 } // anonymous namespace 4382 4383 static RecordDecl * 4384 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4385 if (!Privates.empty()) { 4386 ASTContext &C = CGM.getContext(); 4387 // Build struct .kmp_privates_t. { 4388 // /* private vars */ 4389 // }; 4390 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4391 RD->startDefinition(); 4392 for (const auto &Pair : Privates) { 4393 const VarDecl *VD = Pair.second.Original; 4394 QualType Type = VD->getType().getNonReferenceType(); 4395 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4396 if (VD->hasAttrs()) { 4397 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4398 E(VD->getAttrs().end()); 4399 I != E; ++I) 4400 FD->addAttr(*I); 4401 } 4402 } 4403 RD->completeDefinition(); 4404 return RD; 4405 } 4406 return nullptr; 4407 } 4408 4409 static RecordDecl * 4410 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4411 QualType KmpInt32Ty, 4412 QualType KmpRoutineEntryPointerQTy) { 4413 ASTContext &C = CGM.getContext(); 4414 // Build struct kmp_task_t { 4415 // void * shareds; 4416 // kmp_routine_entry_t routine; 4417 // kmp_int32 part_id; 4418 // kmp_cmplrdata_t data1; 4419 // kmp_cmplrdata_t data2; 4420 // For taskloops additional fields: 4421 // kmp_uint64 lb; 4422 // kmp_uint64 ub; 4423 // kmp_int64 st; 4424 // kmp_int32 liter; 4425 // void * reductions; 4426 // }; 4427 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4428 UD->startDefinition(); 4429 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4430 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4431 UD->completeDefinition(); 4432 QualType KmpCmplrdataTy = C.getRecordType(UD); 4433 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4434 RD->startDefinition(); 4435 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4436 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4437 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4438 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4439 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4440 if (isOpenMPTaskLoopDirective(Kind)) { 4441 QualType KmpUInt64Ty = 4442 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4443 QualType KmpInt64Ty = 4444 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4445 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4446 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4447 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4448 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4449 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4450 } 4451 RD->completeDefinition(); 4452 return RD; 4453 } 4454 4455 static RecordDecl * 4456 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4457 ArrayRef<PrivateDataTy> Privates) { 4458 ASTContext &C = CGM.getContext(); 4459 // Build struct kmp_task_t_with_privates { 4460 // kmp_task_t task_data; 4461 // .kmp_privates_t. privates; 4462 // }; 4463 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4464 RD->startDefinition(); 4465 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4466 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4467 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4468 RD->completeDefinition(); 4469 return RD; 4470 } 4471 4472 /// Emit a proxy function which accepts kmp_task_t as the second 4473 /// argument. 4474 /// \code 4475 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4476 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4477 /// For taskloops: 4478 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4479 /// tt->reductions, tt->shareds); 4480 /// return 0; 4481 /// } 4482 /// \endcode 4483 static llvm::Function * 4484 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4485 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4486 QualType KmpTaskTWithPrivatesPtrQTy, 4487 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4488 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4489 llvm::Value *TaskPrivatesMap) { 4490 ASTContext &C = CGM.getContext(); 4491 FunctionArgList Args; 4492 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4493 ImplicitParamDecl::Other); 4494 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4495 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4496 ImplicitParamDecl::Other); 4497 Args.push_back(&GtidArg); 4498 Args.push_back(&TaskTypeArg); 4499 const auto &TaskEntryFnInfo = 4500 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4501 llvm::FunctionType *TaskEntryTy = 4502 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4503 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4504 auto *TaskEntry = llvm::Function::Create( 4505 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4506 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4507 TaskEntry->setDoesNotRecurse(); 4508 CodeGenFunction CGF(CGM); 4509 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4510 Loc, Loc); 4511 4512 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4513 // tt, 4514 // For taskloops: 4515 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4516 // tt->task_data.shareds); 4517 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4518 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4519 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4520 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4521 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4522 const auto *KmpTaskTWithPrivatesQTyRD = 4523 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4524 LValue Base = 4525 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4526 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4527 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4528 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4529 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4530 4531 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4532 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4533 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4534 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4535 CGF.ConvertTypeForMem(SharedsPtrTy)); 4536 4537 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4538 llvm::Value *PrivatesParam; 4539 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4540 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4541 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4542 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4543 } else { 4544 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4545 } 4546 4547 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4548 TaskPrivatesMap, 4549 CGF.Builder 4550 .CreatePointerBitCastOrAddrSpaceCast( 4551 TDBase.getAddress(), CGF.VoidPtrTy) 4552 .getPointer()}; 4553 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4554 std::end(CommonArgs)); 4555 if (isOpenMPTaskLoopDirective(Kind)) { 4556 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4557 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4558 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4559 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4560 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4561 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4562 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4563 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4564 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4565 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4566 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4567 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4568 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4569 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4570 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4571 CallArgs.push_back(LBParam); 4572 CallArgs.push_back(UBParam); 4573 CallArgs.push_back(StParam); 4574 CallArgs.push_back(LIParam); 4575 CallArgs.push_back(RParam); 4576 } 4577 CallArgs.push_back(SharedsParam); 4578 4579 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4580 CallArgs); 4581 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4582 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4583 CGF.FinishFunction(); 4584 return TaskEntry; 4585 } 4586 4587 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4588 SourceLocation Loc, 4589 QualType KmpInt32Ty, 4590 QualType KmpTaskTWithPrivatesPtrQTy, 4591 QualType KmpTaskTWithPrivatesQTy) { 4592 ASTContext &C = CGM.getContext(); 4593 FunctionArgList Args; 4594 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4595 ImplicitParamDecl::Other); 4596 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4597 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4598 ImplicitParamDecl::Other); 4599 Args.push_back(&GtidArg); 4600 Args.push_back(&TaskTypeArg); 4601 const auto &DestructorFnInfo = 4602 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4603 llvm::FunctionType *DestructorFnTy = 4604 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4605 std::string Name = 4606 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4607 auto *DestructorFn = 4608 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4609 Name, &CGM.getModule()); 4610 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4611 DestructorFnInfo); 4612 DestructorFn->setDoesNotRecurse(); 4613 CodeGenFunction CGF(CGM); 4614 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4615 Args, Loc, Loc); 4616 4617 LValue Base = CGF.EmitLoadOfPointerLValue( 4618 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4619 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4620 const auto *KmpTaskTWithPrivatesQTyRD = 4621 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4622 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4623 Base = CGF.EmitLValueForField(Base, *FI); 4624 for (const auto *Field : 4625 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4626 if (QualType::DestructionKind DtorKind = 4627 Field->getType().isDestructedType()) { 4628 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4629 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4630 } 4631 } 4632 CGF.FinishFunction(); 4633 return DestructorFn; 4634 } 4635 4636 /// Emit a privates mapping function for correct handling of private and 4637 /// firstprivate variables. 4638 /// \code 4639 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4640 /// **noalias priv1,..., <tyn> **noalias privn) { 4641 /// *priv1 = &.privates.priv1; 4642 /// ...; 4643 /// *privn = &.privates.privn; 4644 /// } 4645 /// \endcode 4646 static llvm::Value * 4647 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4648 ArrayRef<const Expr *> PrivateVars, 4649 ArrayRef<const Expr *> FirstprivateVars, 4650 ArrayRef<const Expr *> LastprivateVars, 4651 QualType PrivatesQTy, 4652 ArrayRef<PrivateDataTy> Privates) { 4653 ASTContext &C = CGM.getContext(); 4654 FunctionArgList Args; 4655 ImplicitParamDecl TaskPrivatesArg( 4656 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4657 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4658 ImplicitParamDecl::Other); 4659 Args.push_back(&TaskPrivatesArg); 4660 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4661 unsigned Counter = 1; 4662 for (const Expr *E : PrivateVars) { 4663 Args.push_back(ImplicitParamDecl::Create( 4664 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4665 C.getPointerType(C.getPointerType(E->getType())) 4666 .withConst() 4667 .withRestrict(), 4668 ImplicitParamDecl::Other)); 4669 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4670 PrivateVarsPos[VD] = Counter; 4671 ++Counter; 4672 } 4673 for (const Expr *E : FirstprivateVars) { 4674 Args.push_back(ImplicitParamDecl::Create( 4675 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4676 C.getPointerType(C.getPointerType(E->getType())) 4677 .withConst() 4678 .withRestrict(), 4679 ImplicitParamDecl::Other)); 4680 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4681 PrivateVarsPos[VD] = Counter; 4682 ++Counter; 4683 } 4684 for (const Expr *E : LastprivateVars) { 4685 Args.push_back(ImplicitParamDecl::Create( 4686 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4687 C.getPointerType(C.getPointerType(E->getType())) 4688 .withConst() 4689 .withRestrict(), 4690 ImplicitParamDecl::Other)); 4691 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4692 PrivateVarsPos[VD] = Counter; 4693 ++Counter; 4694 } 4695 const auto &TaskPrivatesMapFnInfo = 4696 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4697 llvm::FunctionType *TaskPrivatesMapTy = 4698 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4699 std::string Name = 4700 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4701 auto *TaskPrivatesMap = llvm::Function::Create( 4702 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4703 &CGM.getModule()); 4704 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4705 TaskPrivatesMapFnInfo); 4706 if (CGM.getLangOpts().Optimize) { 4707 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4708 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4709 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4710 } 4711 CodeGenFunction CGF(CGM); 4712 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4713 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4714 4715 // *privi = &.privates.privi; 4716 LValue Base = CGF.EmitLoadOfPointerLValue( 4717 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4718 TaskPrivatesArg.getType()->castAs<PointerType>()); 4719 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4720 Counter = 0; 4721 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4722 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4723 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4724 LValue RefLVal = 4725 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4726 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4727 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4728 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4729 ++Counter; 4730 } 4731 CGF.FinishFunction(); 4732 return TaskPrivatesMap; 4733 } 4734 4735 /// Emit initialization for private variables in task-based directives. 4736 static void emitPrivatesInit(CodeGenFunction &CGF, 4737 const OMPExecutableDirective &D, 4738 Address KmpTaskSharedsPtr, LValue TDBase, 4739 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4740 QualType SharedsTy, QualType SharedsPtrTy, 4741 const OMPTaskDataTy &Data, 4742 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4743 ASTContext &C = CGF.getContext(); 4744 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4745 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4746 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4747 ? OMPD_taskloop 4748 : OMPD_task; 4749 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4750 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4751 LValue SrcBase; 4752 bool IsTargetTask = 4753 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4754 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4755 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4756 // PointersArray and SizesArray. The original variables for these arrays are 4757 // not captured and we get their addresses explicitly. 4758 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4759 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4760 SrcBase = CGF.MakeAddrLValue( 4761 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4762 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4763 SharedsTy); 4764 } 4765 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4766 for (const PrivateDataTy &Pair : Privates) { 4767 const VarDecl *VD = Pair.second.PrivateCopy; 4768 const Expr *Init = VD->getAnyInitializer(); 4769 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4770 !CGF.isTrivialInitializer(Init)))) { 4771 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4772 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4773 const VarDecl *OriginalVD = Pair.second.Original; 4774 // Check if the variable is the target-based BasePointersArray, 4775 // PointersArray or SizesArray. 4776 LValue SharedRefLValue; 4777 QualType Type = PrivateLValue.getType(); 4778 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4779 if (IsTargetTask && !SharedField) { 4780 assert(isa<ImplicitParamDecl>(OriginalVD) && 4781 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4782 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4783 ->getNumParams() == 0 && 4784 isa<TranslationUnitDecl>( 4785 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4786 ->getDeclContext()) && 4787 "Expected artificial target data variable."); 4788 SharedRefLValue = 4789 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4790 } else { 4791 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4792 SharedRefLValue = CGF.MakeAddrLValue( 4793 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4794 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4795 SharedRefLValue.getTBAAInfo()); 4796 } 4797 if (Type->isArrayType()) { 4798 // Initialize firstprivate array. 4799 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4800 // Perform simple memcpy. 4801 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4802 } else { 4803 // Initialize firstprivate array using element-by-element 4804 // initialization. 4805 CGF.EmitOMPAggregateAssign( 4806 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4807 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4808 Address SrcElement) { 4809 // Clean up any temporaries needed by the initialization. 4810 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4811 InitScope.addPrivate( 4812 Elem, [SrcElement]() -> Address { return SrcElement; }); 4813 (void)InitScope.Privatize(); 4814 // Emit initialization for single element. 4815 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4816 CGF, &CapturesInfo); 4817 CGF.EmitAnyExprToMem(Init, DestElement, 4818 Init->getType().getQualifiers(), 4819 /*IsInitializer=*/false); 4820 }); 4821 } 4822 } else { 4823 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4824 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4825 return SharedRefLValue.getAddress(); 4826 }); 4827 (void)InitScope.Privatize(); 4828 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4829 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4830 /*capturedByInit=*/false); 4831 } 4832 } else { 4833 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4834 } 4835 } 4836 ++FI; 4837 } 4838 } 4839 4840 /// Check if duplication function is required for taskloops. 4841 static bool checkInitIsRequired(CodeGenFunction &CGF, 4842 ArrayRef<PrivateDataTy> Privates) { 4843 bool InitRequired = false; 4844 for (const PrivateDataTy &Pair : Privates) { 4845 const VarDecl *VD = Pair.second.PrivateCopy; 4846 const Expr *Init = VD->getAnyInitializer(); 4847 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4848 !CGF.isTrivialInitializer(Init)); 4849 if (InitRequired) 4850 break; 4851 } 4852 return InitRequired; 4853 } 4854 4855 4856 /// Emit task_dup function (for initialization of 4857 /// private/firstprivate/lastprivate vars and last_iter flag) 4858 /// \code 4859 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4860 /// lastpriv) { 4861 /// // setup lastprivate flag 4862 /// task_dst->last = lastpriv; 4863 /// // could be constructor calls here... 4864 /// } 4865 /// \endcode 4866 static llvm::Value * 4867 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4868 const OMPExecutableDirective &D, 4869 QualType KmpTaskTWithPrivatesPtrQTy, 4870 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4871 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4872 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4873 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4874 ASTContext &C = CGM.getContext(); 4875 FunctionArgList Args; 4876 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4877 KmpTaskTWithPrivatesPtrQTy, 4878 ImplicitParamDecl::Other); 4879 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4880 KmpTaskTWithPrivatesPtrQTy, 4881 ImplicitParamDecl::Other); 4882 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4883 ImplicitParamDecl::Other); 4884 Args.push_back(&DstArg); 4885 Args.push_back(&SrcArg); 4886 Args.push_back(&LastprivArg); 4887 const auto &TaskDupFnInfo = 4888 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4889 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4890 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4891 auto *TaskDup = llvm::Function::Create( 4892 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4893 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4894 TaskDup->setDoesNotRecurse(); 4895 CodeGenFunction CGF(CGM); 4896 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4897 Loc); 4898 4899 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4900 CGF.GetAddrOfLocalVar(&DstArg), 4901 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4902 // task_dst->liter = lastpriv; 4903 if (WithLastIter) { 4904 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4905 LValue Base = CGF.EmitLValueForField( 4906 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4907 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4908 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4909 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4910 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4911 } 4912 4913 // Emit initial values for private copies (if any). 4914 assert(!Privates.empty()); 4915 Address KmpTaskSharedsPtr = Address::invalid(); 4916 if (!Data.FirstprivateVars.empty()) { 4917 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4918 CGF.GetAddrOfLocalVar(&SrcArg), 4919 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4920 LValue Base = CGF.EmitLValueForField( 4921 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4922 KmpTaskSharedsPtr = Address( 4923 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4924 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4925 KmpTaskTShareds)), 4926 Loc), 4927 CGF.getNaturalTypeAlignment(SharedsTy)); 4928 } 4929 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4930 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4931 CGF.FinishFunction(); 4932 return TaskDup; 4933 } 4934 4935 /// Checks if destructor function is required to be generated. 4936 /// \return true if cleanups are required, false otherwise. 4937 static bool 4938 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4939 bool NeedsCleanup = false; 4940 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4941 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4942 for (const FieldDecl *FD : PrivateRD->fields()) { 4943 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4944 if (NeedsCleanup) 4945 break; 4946 } 4947 return NeedsCleanup; 4948 } 4949 4950 CGOpenMPRuntime::TaskResultTy 4951 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4952 const OMPExecutableDirective &D, 4953 llvm::Function *TaskFunction, QualType SharedsTy, 4954 Address Shareds, const OMPTaskDataTy &Data) { 4955 ASTContext &C = CGM.getContext(); 4956 llvm::SmallVector<PrivateDataTy, 4> Privates; 4957 // Aggregate privates and sort them by the alignment. 4958 auto I = Data.PrivateCopies.begin(); 4959 for (const Expr *E : Data.PrivateVars) { 4960 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4961 Privates.emplace_back( 4962 C.getDeclAlign(VD), 4963 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4964 /*PrivateElemInit=*/nullptr)); 4965 ++I; 4966 } 4967 I = Data.FirstprivateCopies.begin(); 4968 auto IElemInitRef = Data.FirstprivateInits.begin(); 4969 for (const Expr *E : Data.FirstprivateVars) { 4970 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4971 Privates.emplace_back( 4972 C.getDeclAlign(VD), 4973 PrivateHelpersTy( 4974 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4975 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4976 ++I; 4977 ++IElemInitRef; 4978 } 4979 I = Data.LastprivateCopies.begin(); 4980 for (const Expr *E : Data.LastprivateVars) { 4981 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4982 Privates.emplace_back( 4983 C.getDeclAlign(VD), 4984 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4985 /*PrivateElemInit=*/nullptr)); 4986 ++I; 4987 } 4988 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4989 return L.first > R.first; 4990 }); 4991 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4992 // Build type kmp_routine_entry_t (if not built yet). 4993 emitKmpRoutineEntryT(KmpInt32Ty); 4994 // Build type kmp_task_t (if not built yet). 4995 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4996 if (SavedKmpTaskloopTQTy.isNull()) { 4997 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4998 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4999 } 5000 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5001 } else { 5002 assert((D.getDirectiveKind() == OMPD_task || 5003 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5004 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5005 "Expected taskloop, task or target directive"); 5006 if (SavedKmpTaskTQTy.isNull()) { 5007 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5008 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5009 } 5010 KmpTaskTQTy = SavedKmpTaskTQTy; 5011 } 5012 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5013 // Build particular struct kmp_task_t for the given task. 5014 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5015 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5016 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5017 QualType KmpTaskTWithPrivatesPtrQTy = 5018 C.getPointerType(KmpTaskTWithPrivatesQTy); 5019 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5020 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5021 KmpTaskTWithPrivatesTy->getPointerTo(); 5022 llvm::Value *KmpTaskTWithPrivatesTySize = 5023 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5024 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5025 5026 // Emit initial values for private copies (if any). 5027 llvm::Value *TaskPrivatesMap = nullptr; 5028 llvm::Type *TaskPrivatesMapTy = 5029 std::next(TaskFunction->arg_begin(), 3)->getType(); 5030 if (!Privates.empty()) { 5031 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5032 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5033 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5034 FI->getType(), Privates); 5035 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5036 TaskPrivatesMap, TaskPrivatesMapTy); 5037 } else { 5038 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5039 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5040 } 5041 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5042 // kmp_task_t *tt); 5043 llvm::Function *TaskEntry = emitProxyTaskFunction( 5044 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5045 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5046 TaskPrivatesMap); 5047 5048 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5049 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5050 // kmp_routine_entry_t *task_entry); 5051 // Task flags. Format is taken from 5052 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5053 // description of kmp_tasking_flags struct. 5054 enum { 5055 TiedFlag = 0x1, 5056 FinalFlag = 0x2, 5057 DestructorsFlag = 0x8, 5058 PriorityFlag = 0x20 5059 }; 5060 unsigned Flags = Data.Tied ? TiedFlag : 0; 5061 bool NeedsCleanup = false; 5062 if (!Privates.empty()) { 5063 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5064 if (NeedsCleanup) 5065 Flags = Flags | DestructorsFlag; 5066 } 5067 if (Data.Priority.getInt()) 5068 Flags = Flags | PriorityFlag; 5069 llvm::Value *TaskFlags = 5070 Data.Final.getPointer() 5071 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5072 CGF.Builder.getInt32(FinalFlag), 5073 CGF.Builder.getInt32(/*C=*/0)) 5074 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5075 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5076 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5077 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 5078 getThreadID(CGF, Loc), TaskFlags, 5079 KmpTaskTWithPrivatesTySize, SharedsSize, 5080 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5081 TaskEntry, KmpRoutineEntryPtrTy)}; 5082 llvm::Value *NewTask = CGF.EmitRuntimeCall( 5083 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5084 llvm::Value *NewTaskNewTaskTTy = 5085 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5086 NewTask, KmpTaskTWithPrivatesPtrTy); 5087 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5088 KmpTaskTWithPrivatesQTy); 5089 LValue TDBase = 5090 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5091 // Fill the data in the resulting kmp_task_t record. 5092 // Copy shareds if there are any. 5093 Address KmpTaskSharedsPtr = Address::invalid(); 5094 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5095 KmpTaskSharedsPtr = 5096 Address(CGF.EmitLoadOfScalar( 5097 CGF.EmitLValueForField( 5098 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5099 KmpTaskTShareds)), 5100 Loc), 5101 CGF.getNaturalTypeAlignment(SharedsTy)); 5102 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5103 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5104 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5105 } 5106 // Emit initial values for private copies (if any). 5107 TaskResultTy Result; 5108 if (!Privates.empty()) { 5109 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5110 SharedsTy, SharedsPtrTy, Data, Privates, 5111 /*ForDup=*/false); 5112 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5113 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5114 Result.TaskDupFn = emitTaskDupFunction( 5115 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5116 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5117 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5118 } 5119 } 5120 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5121 enum { Priority = 0, Destructors = 1 }; 5122 // Provide pointer to function with destructors for privates. 5123 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5124 const RecordDecl *KmpCmplrdataUD = 5125 (*FI)->getType()->getAsUnionType()->getDecl(); 5126 if (NeedsCleanup) { 5127 llvm::Value *DestructorFn = emitDestructorsFunction( 5128 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5129 KmpTaskTWithPrivatesQTy); 5130 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5131 LValue DestructorsLV = CGF.EmitLValueForField( 5132 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5133 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5134 DestructorFn, KmpRoutineEntryPtrTy), 5135 DestructorsLV); 5136 } 5137 // Set priority. 5138 if (Data.Priority.getInt()) { 5139 LValue Data2LV = CGF.EmitLValueForField( 5140 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5141 LValue PriorityLV = CGF.EmitLValueForField( 5142 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5143 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5144 } 5145 Result.NewTask = NewTask; 5146 Result.TaskEntry = TaskEntry; 5147 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5148 Result.TDBase = TDBase; 5149 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5150 return Result; 5151 } 5152 5153 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5154 const OMPExecutableDirective &D, 5155 llvm::Function *TaskFunction, 5156 QualType SharedsTy, Address Shareds, 5157 const Expr *IfCond, 5158 const OMPTaskDataTy &Data) { 5159 if (!CGF.HaveInsertPoint()) 5160 return; 5161 5162 TaskResultTy Result = 5163 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5164 llvm::Value *NewTask = Result.NewTask; 5165 llvm::Function *TaskEntry = Result.TaskEntry; 5166 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5167 LValue TDBase = Result.TDBase; 5168 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5169 ASTContext &C = CGM.getContext(); 5170 // Process list of dependences. 5171 Address DependenciesArray = Address::invalid(); 5172 unsigned NumDependencies = Data.Dependences.size(); 5173 if (NumDependencies) { 5174 // Dependence kind for RTL. 5175 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5176 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5177 RecordDecl *KmpDependInfoRD; 5178 QualType FlagsTy = 5179 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5180 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5181 if (KmpDependInfoTy.isNull()) { 5182 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5183 KmpDependInfoRD->startDefinition(); 5184 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5185 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5186 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5187 KmpDependInfoRD->completeDefinition(); 5188 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5189 } else { 5190 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5191 } 5192 // Define type kmp_depend_info[<Dependences.size()>]; 5193 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5194 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5195 ArrayType::Normal, /*IndexTypeQuals=*/0); 5196 // kmp_depend_info[<Dependences.size()>] deps; 5197 DependenciesArray = 5198 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5199 for (unsigned I = 0; I < NumDependencies; ++I) { 5200 const Expr *E = Data.Dependences[I].second; 5201 LValue Addr = CGF.EmitLValue(E); 5202 llvm::Value *Size; 5203 QualType Ty = E->getType(); 5204 if (const auto *ASE = 5205 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5206 LValue UpAddrLVal = 5207 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 5208 llvm::Value *UpAddr = 5209 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5210 llvm::Value *LowIntPtr = 5211 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5212 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5213 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5214 } else { 5215 Size = CGF.getTypeSize(Ty); 5216 } 5217 LValue Base = CGF.MakeAddrLValue( 5218 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5219 KmpDependInfoTy); 5220 // deps[i].base_addr = &<Dependences[i].second>; 5221 LValue BaseAddrLVal = CGF.EmitLValueForField( 5222 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5223 CGF.EmitStoreOfScalar( 5224 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5225 BaseAddrLVal); 5226 // deps[i].len = sizeof(<Dependences[i].second>); 5227 LValue LenLVal = CGF.EmitLValueForField( 5228 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5229 CGF.EmitStoreOfScalar(Size, LenLVal); 5230 // deps[i].flags = <Dependences[i].first>; 5231 RTLDependenceKindTy DepKind; 5232 switch (Data.Dependences[I].first) { 5233 case OMPC_DEPEND_in: 5234 DepKind = DepIn; 5235 break; 5236 // Out and InOut dependencies must use the same code. 5237 case OMPC_DEPEND_out: 5238 case OMPC_DEPEND_inout: 5239 DepKind = DepInOut; 5240 break; 5241 case OMPC_DEPEND_mutexinoutset: 5242 DepKind = DepMutexInOutSet; 5243 break; 5244 case OMPC_DEPEND_source: 5245 case OMPC_DEPEND_sink: 5246 case OMPC_DEPEND_unknown: 5247 llvm_unreachable("Unknown task dependence type"); 5248 } 5249 LValue FlagsLVal = CGF.EmitLValueForField( 5250 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5251 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5252 FlagsLVal); 5253 } 5254 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5255 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5256 } 5257 5258 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5259 // libcall. 5260 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5261 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5262 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5263 // list is not empty 5264 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5265 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5266 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5267 llvm::Value *DepTaskArgs[7]; 5268 if (NumDependencies) { 5269 DepTaskArgs[0] = UpLoc; 5270 DepTaskArgs[1] = ThreadID; 5271 DepTaskArgs[2] = NewTask; 5272 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5273 DepTaskArgs[4] = DependenciesArray.getPointer(); 5274 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5275 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5276 } 5277 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5278 &TaskArgs, 5279 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5280 if (!Data.Tied) { 5281 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5282 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5283 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5284 } 5285 if (NumDependencies) { 5286 CGF.EmitRuntimeCall( 5287 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5288 } else { 5289 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5290 TaskArgs); 5291 } 5292 // Check if parent region is untied and build return for untied task; 5293 if (auto *Region = 5294 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5295 Region->emitUntiedSwitch(CGF); 5296 }; 5297 5298 llvm::Value *DepWaitTaskArgs[6]; 5299 if (NumDependencies) { 5300 DepWaitTaskArgs[0] = UpLoc; 5301 DepWaitTaskArgs[1] = ThreadID; 5302 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5303 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5304 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5305 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5306 } 5307 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5308 NumDependencies, &DepWaitTaskArgs, 5309 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5310 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5311 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5312 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5313 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5314 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5315 // is specified. 5316 if (NumDependencies) 5317 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5318 DepWaitTaskArgs); 5319 // Call proxy_task_entry(gtid, new_task); 5320 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5321 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5322 Action.Enter(CGF); 5323 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5324 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5325 OutlinedFnArgs); 5326 }; 5327 5328 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5329 // kmp_task_t *new_task); 5330 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5331 // kmp_task_t *new_task); 5332 RegionCodeGenTy RCG(CodeGen); 5333 CommonActionTy Action( 5334 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5335 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5336 RCG.setAction(Action); 5337 RCG(CGF); 5338 }; 5339 5340 if (IfCond) { 5341 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5342 } else { 5343 RegionCodeGenTy ThenRCG(ThenCodeGen); 5344 ThenRCG(CGF); 5345 } 5346 } 5347 5348 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5349 const OMPLoopDirective &D, 5350 llvm::Function *TaskFunction, 5351 QualType SharedsTy, Address Shareds, 5352 const Expr *IfCond, 5353 const OMPTaskDataTy &Data) { 5354 if (!CGF.HaveInsertPoint()) 5355 return; 5356 TaskResultTy Result = 5357 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5358 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5359 // libcall. 5360 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5361 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5362 // sched, kmp_uint64 grainsize, void *task_dup); 5363 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5364 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5365 llvm::Value *IfVal; 5366 if (IfCond) { 5367 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5368 /*isSigned=*/true); 5369 } else { 5370 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5371 } 5372 5373 LValue LBLVal = CGF.EmitLValueForField( 5374 Result.TDBase, 5375 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5376 const auto *LBVar = 5377 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5378 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5379 /*IsInitializer=*/true); 5380 LValue UBLVal = CGF.EmitLValueForField( 5381 Result.TDBase, 5382 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5383 const auto *UBVar = 5384 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5385 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5386 /*IsInitializer=*/true); 5387 LValue StLVal = CGF.EmitLValueForField( 5388 Result.TDBase, 5389 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5390 const auto *StVar = 5391 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5392 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5393 /*IsInitializer=*/true); 5394 // Store reductions address. 5395 LValue RedLVal = CGF.EmitLValueForField( 5396 Result.TDBase, 5397 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5398 if (Data.Reductions) { 5399 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5400 } else { 5401 CGF.EmitNullInitialization(RedLVal.getAddress(), 5402 CGF.getContext().VoidPtrTy); 5403 } 5404 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5405 llvm::Value *TaskArgs[] = { 5406 UpLoc, 5407 ThreadID, 5408 Result.NewTask, 5409 IfVal, 5410 LBLVal.getPointer(), 5411 UBLVal.getPointer(), 5412 CGF.EmitLoadOfScalar(StLVal, Loc), 5413 llvm::ConstantInt::getSigned( 5414 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5415 llvm::ConstantInt::getSigned( 5416 CGF.IntTy, Data.Schedule.getPointer() 5417 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5418 : NoSchedule), 5419 Data.Schedule.getPointer() 5420 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5421 /*isSigned=*/false) 5422 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5423 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5424 Result.TaskDupFn, CGF.VoidPtrTy) 5425 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5426 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5427 } 5428 5429 /// Emit reduction operation for each element of array (required for 5430 /// array sections) LHS op = RHS. 5431 /// \param Type Type of array. 5432 /// \param LHSVar Variable on the left side of the reduction operation 5433 /// (references element of array in original variable). 5434 /// \param RHSVar Variable on the right side of the reduction operation 5435 /// (references element of array in original variable). 5436 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5437 /// RHSVar. 5438 static void EmitOMPAggregateReduction( 5439 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5440 const VarDecl *RHSVar, 5441 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5442 const Expr *, const Expr *)> &RedOpGen, 5443 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5444 const Expr *UpExpr = nullptr) { 5445 // Perform element-by-element initialization. 5446 QualType ElementTy; 5447 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5448 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5449 5450 // Drill down to the base element type on both arrays. 5451 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5452 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5453 5454 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5455 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5456 // Cast from pointer to array type to pointer to single element. 5457 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5458 // The basic structure here is a while-do loop. 5459 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5460 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5461 llvm::Value *IsEmpty = 5462 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5463 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5464 5465 // Enter the loop body, making that address the current address. 5466 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5467 CGF.EmitBlock(BodyBB); 5468 5469 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5470 5471 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5472 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5473 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5474 Address RHSElementCurrent = 5475 Address(RHSElementPHI, 5476 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5477 5478 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5479 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5480 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5481 Address LHSElementCurrent = 5482 Address(LHSElementPHI, 5483 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5484 5485 // Emit copy. 5486 CodeGenFunction::OMPPrivateScope Scope(CGF); 5487 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5488 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5489 Scope.Privatize(); 5490 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5491 Scope.ForceCleanup(); 5492 5493 // Shift the address forward by one element. 5494 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5495 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5496 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5497 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5498 // Check whether we've reached the end. 5499 llvm::Value *Done = 5500 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5501 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5502 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5503 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5504 5505 // Done. 5506 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5507 } 5508 5509 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5510 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5511 /// UDR combiner function. 5512 static void emitReductionCombiner(CodeGenFunction &CGF, 5513 const Expr *ReductionOp) { 5514 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5515 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5516 if (const auto *DRE = 5517 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5518 if (const auto *DRD = 5519 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5520 std::pair<llvm::Function *, llvm::Function *> Reduction = 5521 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5522 RValue Func = RValue::get(Reduction.first); 5523 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5524 CGF.EmitIgnoredExpr(ReductionOp); 5525 return; 5526 } 5527 CGF.EmitIgnoredExpr(ReductionOp); 5528 } 5529 5530 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5531 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5532 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5533 ArrayRef<const Expr *> ReductionOps) { 5534 ASTContext &C = CGM.getContext(); 5535 5536 // void reduction_func(void *LHSArg, void *RHSArg); 5537 FunctionArgList Args; 5538 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5539 ImplicitParamDecl::Other); 5540 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5541 ImplicitParamDecl::Other); 5542 Args.push_back(&LHSArg); 5543 Args.push_back(&RHSArg); 5544 const auto &CGFI = 5545 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5546 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5547 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5548 llvm::GlobalValue::InternalLinkage, Name, 5549 &CGM.getModule()); 5550 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5551 Fn->setDoesNotRecurse(); 5552 CodeGenFunction CGF(CGM); 5553 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5554 5555 // Dst = (void*[n])(LHSArg); 5556 // Src = (void*[n])(RHSArg); 5557 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5558 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5559 ArgsType), CGF.getPointerAlign()); 5560 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5561 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5562 ArgsType), CGF.getPointerAlign()); 5563 5564 // ... 5565 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5566 // ... 5567 CodeGenFunction::OMPPrivateScope Scope(CGF); 5568 auto IPriv = Privates.begin(); 5569 unsigned Idx = 0; 5570 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5571 const auto *RHSVar = 5572 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5573 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5574 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5575 }); 5576 const auto *LHSVar = 5577 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5578 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5579 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5580 }); 5581 QualType PrivTy = (*IPriv)->getType(); 5582 if (PrivTy->isVariablyModifiedType()) { 5583 // Get array size and emit VLA type. 5584 ++Idx; 5585 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5586 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5587 const VariableArrayType *VLA = 5588 CGF.getContext().getAsVariableArrayType(PrivTy); 5589 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5590 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5591 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5592 CGF.EmitVariablyModifiedType(PrivTy); 5593 } 5594 } 5595 Scope.Privatize(); 5596 IPriv = Privates.begin(); 5597 auto ILHS = LHSExprs.begin(); 5598 auto IRHS = RHSExprs.begin(); 5599 for (const Expr *E : ReductionOps) { 5600 if ((*IPriv)->getType()->isArrayType()) { 5601 // Emit reduction for array section. 5602 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5603 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5604 EmitOMPAggregateReduction( 5605 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5606 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5607 emitReductionCombiner(CGF, E); 5608 }); 5609 } else { 5610 // Emit reduction for array subscript or single variable. 5611 emitReductionCombiner(CGF, E); 5612 } 5613 ++IPriv; 5614 ++ILHS; 5615 ++IRHS; 5616 } 5617 Scope.ForceCleanup(); 5618 CGF.FinishFunction(); 5619 return Fn; 5620 } 5621 5622 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5623 const Expr *ReductionOp, 5624 const Expr *PrivateRef, 5625 const DeclRefExpr *LHS, 5626 const DeclRefExpr *RHS) { 5627 if (PrivateRef->getType()->isArrayType()) { 5628 // Emit reduction for array section. 5629 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5630 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5631 EmitOMPAggregateReduction( 5632 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5633 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5634 emitReductionCombiner(CGF, ReductionOp); 5635 }); 5636 } else { 5637 // Emit reduction for array subscript or single variable. 5638 emitReductionCombiner(CGF, ReductionOp); 5639 } 5640 } 5641 5642 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5643 ArrayRef<const Expr *> Privates, 5644 ArrayRef<const Expr *> LHSExprs, 5645 ArrayRef<const Expr *> RHSExprs, 5646 ArrayRef<const Expr *> ReductionOps, 5647 ReductionOptionsTy Options) { 5648 if (!CGF.HaveInsertPoint()) 5649 return; 5650 5651 bool WithNowait = Options.WithNowait; 5652 bool SimpleReduction = Options.SimpleReduction; 5653 5654 // Next code should be emitted for reduction: 5655 // 5656 // static kmp_critical_name lock = { 0 }; 5657 // 5658 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5659 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5660 // ... 5661 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5662 // *(Type<n>-1*)rhs[<n>-1]); 5663 // } 5664 // 5665 // ... 5666 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5667 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5668 // RedList, reduce_func, &<lock>)) { 5669 // case 1: 5670 // ... 5671 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5672 // ... 5673 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5674 // break; 5675 // case 2: 5676 // ... 5677 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5678 // ... 5679 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5680 // break; 5681 // default:; 5682 // } 5683 // 5684 // if SimpleReduction is true, only the next code is generated: 5685 // ... 5686 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5687 // ... 5688 5689 ASTContext &C = CGM.getContext(); 5690 5691 if (SimpleReduction) { 5692 CodeGenFunction::RunCleanupsScope Scope(CGF); 5693 auto IPriv = Privates.begin(); 5694 auto ILHS = LHSExprs.begin(); 5695 auto IRHS = RHSExprs.begin(); 5696 for (const Expr *E : ReductionOps) { 5697 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5698 cast<DeclRefExpr>(*IRHS)); 5699 ++IPriv; 5700 ++ILHS; 5701 ++IRHS; 5702 } 5703 return; 5704 } 5705 5706 // 1. Build a list of reduction variables. 5707 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5708 auto Size = RHSExprs.size(); 5709 for (const Expr *E : Privates) { 5710 if (E->getType()->isVariablyModifiedType()) 5711 // Reserve place for array size. 5712 ++Size; 5713 } 5714 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5715 QualType ReductionArrayTy = 5716 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5717 /*IndexTypeQuals=*/0); 5718 Address ReductionList = 5719 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5720 auto IPriv = Privates.begin(); 5721 unsigned Idx = 0; 5722 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5723 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5724 CGF.Builder.CreateStore( 5725 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5726 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5727 Elem); 5728 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5729 // Store array size. 5730 ++Idx; 5731 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5732 llvm::Value *Size = CGF.Builder.CreateIntCast( 5733 CGF.getVLASize( 5734 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5735 .NumElts, 5736 CGF.SizeTy, /*isSigned=*/false); 5737 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5738 Elem); 5739 } 5740 } 5741 5742 // 2. Emit reduce_func(). 5743 llvm::Function *ReductionFn = emitReductionFunction( 5744 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5745 LHSExprs, RHSExprs, ReductionOps); 5746 5747 // 3. Create static kmp_critical_name lock = { 0 }; 5748 std::string Name = getName({"reduction"}); 5749 llvm::Value *Lock = getCriticalRegionLock(Name); 5750 5751 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5752 // RedList, reduce_func, &<lock>); 5753 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5754 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5755 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5756 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5757 ReductionList.getPointer(), CGF.VoidPtrTy); 5758 llvm::Value *Args[] = { 5759 IdentTLoc, // ident_t *<loc> 5760 ThreadId, // i32 <gtid> 5761 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5762 ReductionArrayTySize, // size_type sizeof(RedList) 5763 RL, // void *RedList 5764 ReductionFn, // void (*) (void *, void *) <reduce_func> 5765 Lock // kmp_critical_name *&<lock> 5766 }; 5767 llvm::Value *Res = CGF.EmitRuntimeCall( 5768 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5769 : OMPRTL__kmpc_reduce), 5770 Args); 5771 5772 // 5. Build switch(res) 5773 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5774 llvm::SwitchInst *SwInst = 5775 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5776 5777 // 6. Build case 1: 5778 // ... 5779 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5780 // ... 5781 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5782 // break; 5783 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5784 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5785 CGF.EmitBlock(Case1BB); 5786 5787 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5788 llvm::Value *EndArgs[] = { 5789 IdentTLoc, // ident_t *<loc> 5790 ThreadId, // i32 <gtid> 5791 Lock // kmp_critical_name *&<lock> 5792 }; 5793 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5794 CodeGenFunction &CGF, PrePostActionTy &Action) { 5795 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5796 auto IPriv = Privates.begin(); 5797 auto ILHS = LHSExprs.begin(); 5798 auto IRHS = RHSExprs.begin(); 5799 for (const Expr *E : ReductionOps) { 5800 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5801 cast<DeclRefExpr>(*IRHS)); 5802 ++IPriv; 5803 ++ILHS; 5804 ++IRHS; 5805 } 5806 }; 5807 RegionCodeGenTy RCG(CodeGen); 5808 CommonActionTy Action( 5809 nullptr, llvm::None, 5810 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5811 : OMPRTL__kmpc_end_reduce), 5812 EndArgs); 5813 RCG.setAction(Action); 5814 RCG(CGF); 5815 5816 CGF.EmitBranch(DefaultBB); 5817 5818 // 7. Build case 2: 5819 // ... 5820 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5821 // ... 5822 // break; 5823 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5824 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5825 CGF.EmitBlock(Case2BB); 5826 5827 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5828 CodeGenFunction &CGF, PrePostActionTy &Action) { 5829 auto ILHS = LHSExprs.begin(); 5830 auto IRHS = RHSExprs.begin(); 5831 auto IPriv = Privates.begin(); 5832 for (const Expr *E : ReductionOps) { 5833 const Expr *XExpr = nullptr; 5834 const Expr *EExpr = nullptr; 5835 const Expr *UpExpr = nullptr; 5836 BinaryOperatorKind BO = BO_Comma; 5837 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5838 if (BO->getOpcode() == BO_Assign) { 5839 XExpr = BO->getLHS(); 5840 UpExpr = BO->getRHS(); 5841 } 5842 } 5843 // Try to emit update expression as a simple atomic. 5844 const Expr *RHSExpr = UpExpr; 5845 if (RHSExpr) { 5846 // Analyze RHS part of the whole expression. 5847 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5848 RHSExpr->IgnoreParenImpCasts())) { 5849 // If this is a conditional operator, analyze its condition for 5850 // min/max reduction operator. 5851 RHSExpr = ACO->getCond(); 5852 } 5853 if (const auto *BORHS = 5854 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5855 EExpr = BORHS->getRHS(); 5856 BO = BORHS->getOpcode(); 5857 } 5858 } 5859 if (XExpr) { 5860 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5861 auto &&AtomicRedGen = [BO, VD, 5862 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5863 const Expr *EExpr, const Expr *UpExpr) { 5864 LValue X = CGF.EmitLValue(XExpr); 5865 RValue E; 5866 if (EExpr) 5867 E = CGF.EmitAnyExpr(EExpr); 5868 CGF.EmitOMPAtomicSimpleUpdateExpr( 5869 X, E, BO, /*IsXLHSInRHSPart=*/true, 5870 llvm::AtomicOrdering::Monotonic, Loc, 5871 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5872 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5873 PrivateScope.addPrivate( 5874 VD, [&CGF, VD, XRValue, Loc]() { 5875 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5876 CGF.emitOMPSimpleStore( 5877 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5878 VD->getType().getNonReferenceType(), Loc); 5879 return LHSTemp; 5880 }); 5881 (void)PrivateScope.Privatize(); 5882 return CGF.EmitAnyExpr(UpExpr); 5883 }); 5884 }; 5885 if ((*IPriv)->getType()->isArrayType()) { 5886 // Emit atomic reduction for array section. 5887 const auto *RHSVar = 5888 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5889 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5890 AtomicRedGen, XExpr, EExpr, UpExpr); 5891 } else { 5892 // Emit atomic reduction for array subscript or single variable. 5893 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5894 } 5895 } else { 5896 // Emit as a critical region. 5897 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5898 const Expr *, const Expr *) { 5899 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5900 std::string Name = RT.getName({"atomic_reduction"}); 5901 RT.emitCriticalRegion( 5902 CGF, Name, 5903 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5904 Action.Enter(CGF); 5905 emitReductionCombiner(CGF, E); 5906 }, 5907 Loc); 5908 }; 5909 if ((*IPriv)->getType()->isArrayType()) { 5910 const auto *LHSVar = 5911 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5912 const auto *RHSVar = 5913 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5914 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5915 CritRedGen); 5916 } else { 5917 CritRedGen(CGF, nullptr, nullptr, nullptr); 5918 } 5919 } 5920 ++ILHS; 5921 ++IRHS; 5922 ++IPriv; 5923 } 5924 }; 5925 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5926 if (!WithNowait) { 5927 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5928 llvm::Value *EndArgs[] = { 5929 IdentTLoc, // ident_t *<loc> 5930 ThreadId, // i32 <gtid> 5931 Lock // kmp_critical_name *&<lock> 5932 }; 5933 CommonActionTy Action(nullptr, llvm::None, 5934 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5935 EndArgs); 5936 AtomicRCG.setAction(Action); 5937 AtomicRCG(CGF); 5938 } else { 5939 AtomicRCG(CGF); 5940 } 5941 5942 CGF.EmitBranch(DefaultBB); 5943 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5944 } 5945 5946 /// Generates unique name for artificial threadprivate variables. 5947 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5948 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5949 const Expr *Ref) { 5950 SmallString<256> Buffer; 5951 llvm::raw_svector_ostream Out(Buffer); 5952 const clang::DeclRefExpr *DE; 5953 const VarDecl *D = ::getBaseDecl(Ref, DE); 5954 if (!D) 5955 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5956 D = D->getCanonicalDecl(); 5957 std::string Name = CGM.getOpenMPRuntime().getName( 5958 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5959 Out << Prefix << Name << "_" 5960 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5961 return Out.str(); 5962 } 5963 5964 /// Emits reduction initializer function: 5965 /// \code 5966 /// void @.red_init(void* %arg) { 5967 /// %0 = bitcast void* %arg to <type>* 5968 /// store <type> <init>, <type>* %0 5969 /// ret void 5970 /// } 5971 /// \endcode 5972 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5973 SourceLocation Loc, 5974 ReductionCodeGen &RCG, unsigned N) { 5975 ASTContext &C = CGM.getContext(); 5976 FunctionArgList Args; 5977 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5978 ImplicitParamDecl::Other); 5979 Args.emplace_back(&Param); 5980 const auto &FnInfo = 5981 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5982 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5983 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5984 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5985 Name, &CGM.getModule()); 5986 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5987 Fn->setDoesNotRecurse(); 5988 CodeGenFunction CGF(CGM); 5989 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5990 Address PrivateAddr = CGF.EmitLoadOfPointer( 5991 CGF.GetAddrOfLocalVar(&Param), 5992 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5993 llvm::Value *Size = nullptr; 5994 // If the size of the reduction item is non-constant, load it from global 5995 // threadprivate variable. 5996 if (RCG.getSizes(N).second) { 5997 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5998 CGF, CGM.getContext().getSizeType(), 5999 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6000 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6001 CGM.getContext().getSizeType(), Loc); 6002 } 6003 RCG.emitAggregateType(CGF, N, Size); 6004 LValue SharedLVal; 6005 // If initializer uses initializer from declare reduction construct, emit a 6006 // pointer to the address of the original reduction item (reuired by reduction 6007 // initializer) 6008 if (RCG.usesReductionInitializer(N)) { 6009 Address SharedAddr = 6010 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6011 CGF, CGM.getContext().VoidPtrTy, 6012 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6013 SharedAddr = CGF.EmitLoadOfPointer( 6014 SharedAddr, 6015 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6016 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6017 } else { 6018 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6019 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6020 CGM.getContext().VoidPtrTy); 6021 } 6022 // Emit the initializer: 6023 // %0 = bitcast void* %arg to <type>* 6024 // store <type> <init>, <type>* %0 6025 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6026 [](CodeGenFunction &) { return false; }); 6027 CGF.FinishFunction(); 6028 return Fn; 6029 } 6030 6031 /// Emits reduction combiner function: 6032 /// \code 6033 /// void @.red_comb(void* %arg0, void* %arg1) { 6034 /// %lhs = bitcast void* %arg0 to <type>* 6035 /// %rhs = bitcast void* %arg1 to <type>* 6036 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6037 /// store <type> %2, <type>* %lhs 6038 /// ret void 6039 /// } 6040 /// \endcode 6041 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6042 SourceLocation Loc, 6043 ReductionCodeGen &RCG, unsigned N, 6044 const Expr *ReductionOp, 6045 const Expr *LHS, const Expr *RHS, 6046 const Expr *PrivateRef) { 6047 ASTContext &C = CGM.getContext(); 6048 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6049 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6050 FunctionArgList Args; 6051 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6052 C.VoidPtrTy, ImplicitParamDecl::Other); 6053 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6054 ImplicitParamDecl::Other); 6055 Args.emplace_back(&ParamInOut); 6056 Args.emplace_back(&ParamIn); 6057 const auto &FnInfo = 6058 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6059 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6060 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6061 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6062 Name, &CGM.getModule()); 6063 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6064 Fn->setDoesNotRecurse(); 6065 CodeGenFunction CGF(CGM); 6066 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6067 llvm::Value *Size = nullptr; 6068 // If the size of the reduction item is non-constant, load it from global 6069 // threadprivate variable. 6070 if (RCG.getSizes(N).second) { 6071 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6072 CGF, CGM.getContext().getSizeType(), 6073 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6074 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6075 CGM.getContext().getSizeType(), Loc); 6076 } 6077 RCG.emitAggregateType(CGF, N, Size); 6078 // Remap lhs and rhs variables to the addresses of the function arguments. 6079 // %lhs = bitcast void* %arg0 to <type>* 6080 // %rhs = bitcast void* %arg1 to <type>* 6081 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6082 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6083 // Pull out the pointer to the variable. 6084 Address PtrAddr = CGF.EmitLoadOfPointer( 6085 CGF.GetAddrOfLocalVar(&ParamInOut), 6086 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6087 return CGF.Builder.CreateElementBitCast( 6088 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6089 }); 6090 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6091 // Pull out the pointer to the variable. 6092 Address PtrAddr = CGF.EmitLoadOfPointer( 6093 CGF.GetAddrOfLocalVar(&ParamIn), 6094 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6095 return CGF.Builder.CreateElementBitCast( 6096 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6097 }); 6098 PrivateScope.Privatize(); 6099 // Emit the combiner body: 6100 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6101 // store <type> %2, <type>* %lhs 6102 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6103 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6104 cast<DeclRefExpr>(RHS)); 6105 CGF.FinishFunction(); 6106 return Fn; 6107 } 6108 6109 /// Emits reduction finalizer function: 6110 /// \code 6111 /// void @.red_fini(void* %arg) { 6112 /// %0 = bitcast void* %arg to <type>* 6113 /// <destroy>(<type>* %0) 6114 /// ret void 6115 /// } 6116 /// \endcode 6117 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6118 SourceLocation Loc, 6119 ReductionCodeGen &RCG, unsigned N) { 6120 if (!RCG.needCleanups(N)) 6121 return nullptr; 6122 ASTContext &C = CGM.getContext(); 6123 FunctionArgList Args; 6124 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6125 ImplicitParamDecl::Other); 6126 Args.emplace_back(&Param); 6127 const auto &FnInfo = 6128 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6129 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6130 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6131 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6132 Name, &CGM.getModule()); 6133 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6134 Fn->setDoesNotRecurse(); 6135 CodeGenFunction CGF(CGM); 6136 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6137 Address PrivateAddr = CGF.EmitLoadOfPointer( 6138 CGF.GetAddrOfLocalVar(&Param), 6139 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6140 llvm::Value *Size = nullptr; 6141 // If the size of the reduction item is non-constant, load it from global 6142 // threadprivate variable. 6143 if (RCG.getSizes(N).second) { 6144 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6145 CGF, CGM.getContext().getSizeType(), 6146 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6147 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6148 CGM.getContext().getSizeType(), Loc); 6149 } 6150 RCG.emitAggregateType(CGF, N, Size); 6151 // Emit the finalizer body: 6152 // <destroy>(<type>* %0) 6153 RCG.emitCleanups(CGF, N, PrivateAddr); 6154 CGF.FinishFunction(); 6155 return Fn; 6156 } 6157 6158 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6159 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6160 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6161 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6162 return nullptr; 6163 6164 // Build typedef struct: 6165 // kmp_task_red_input { 6166 // void *reduce_shar; // shared reduction item 6167 // size_t reduce_size; // size of data item 6168 // void *reduce_init; // data initialization routine 6169 // void *reduce_fini; // data finalization routine 6170 // void *reduce_comb; // data combiner routine 6171 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6172 // } kmp_task_red_input_t; 6173 ASTContext &C = CGM.getContext(); 6174 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6175 RD->startDefinition(); 6176 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6177 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6178 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6179 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6180 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6181 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6182 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6183 RD->completeDefinition(); 6184 QualType RDType = C.getRecordType(RD); 6185 unsigned Size = Data.ReductionVars.size(); 6186 llvm::APInt ArraySize(/*numBits=*/64, Size); 6187 QualType ArrayRDType = C.getConstantArrayType( 6188 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6189 // kmp_task_red_input_t .rd_input.[Size]; 6190 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6191 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6192 Data.ReductionOps); 6193 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6194 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6195 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6196 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6197 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6198 TaskRedInput.getPointer(), Idxs, 6199 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6200 ".rd_input.gep."); 6201 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6202 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6203 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6204 RCG.emitSharedLValue(CGF, Cnt); 6205 llvm::Value *CastedShared = 6206 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6207 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6208 RCG.emitAggregateType(CGF, Cnt); 6209 llvm::Value *SizeValInChars; 6210 llvm::Value *SizeVal; 6211 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6212 // We use delayed creation/initialization for VLAs, array sections and 6213 // custom reduction initializations. It is required because runtime does not 6214 // provide the way to pass the sizes of VLAs/array sections to 6215 // initializer/combiner/finalizer functions and does not pass the pointer to 6216 // original reduction item to the initializer. Instead threadprivate global 6217 // variables are used to store these values and use them in the functions. 6218 bool DelayedCreation = !!SizeVal; 6219 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6220 /*isSigned=*/false); 6221 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6222 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6223 // ElemLVal.reduce_init = init; 6224 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6225 llvm::Value *InitAddr = 6226 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6227 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6228 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6229 // ElemLVal.reduce_fini = fini; 6230 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6231 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6232 llvm::Value *FiniAddr = Fini 6233 ? CGF.EmitCastToVoidPtr(Fini) 6234 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6235 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6236 // ElemLVal.reduce_comb = comb; 6237 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6238 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6239 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6240 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6241 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6242 // ElemLVal.flags = 0; 6243 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6244 if (DelayedCreation) { 6245 CGF.EmitStoreOfScalar( 6246 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 6247 FlagsLVal); 6248 } else 6249 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6250 } 6251 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6252 // *data); 6253 llvm::Value *Args[] = { 6254 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6255 /*isSigned=*/true), 6256 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6257 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6258 CGM.VoidPtrTy)}; 6259 return CGF.EmitRuntimeCall( 6260 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6261 } 6262 6263 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6264 SourceLocation Loc, 6265 ReductionCodeGen &RCG, 6266 unsigned N) { 6267 auto Sizes = RCG.getSizes(N); 6268 // Emit threadprivate global variable if the type is non-constant 6269 // (Sizes.second = nullptr). 6270 if (Sizes.second) { 6271 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6272 /*isSigned=*/false); 6273 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6274 CGF, CGM.getContext().getSizeType(), 6275 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6276 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6277 } 6278 // Store address of the original reduction item if custom initializer is used. 6279 if (RCG.usesReductionInitializer(N)) { 6280 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6281 CGF, CGM.getContext().VoidPtrTy, 6282 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6283 CGF.Builder.CreateStore( 6284 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6285 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6286 SharedAddr, /*IsVolatile=*/false); 6287 } 6288 } 6289 6290 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6291 SourceLocation Loc, 6292 llvm::Value *ReductionsPtr, 6293 LValue SharedLVal) { 6294 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6295 // *d); 6296 llvm::Value *Args[] = { 6297 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6298 /*isSigned=*/true), 6299 ReductionsPtr, 6300 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6301 CGM.VoidPtrTy)}; 6302 return Address( 6303 CGF.EmitRuntimeCall( 6304 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6305 SharedLVal.getAlignment()); 6306 } 6307 6308 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6309 SourceLocation Loc) { 6310 if (!CGF.HaveInsertPoint()) 6311 return; 6312 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6313 // global_tid); 6314 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6315 // Ignore return result until untied tasks are supported. 6316 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6317 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6318 Region->emitUntiedSwitch(CGF); 6319 } 6320 6321 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6322 OpenMPDirectiveKind InnerKind, 6323 const RegionCodeGenTy &CodeGen, 6324 bool HasCancel) { 6325 if (!CGF.HaveInsertPoint()) 6326 return; 6327 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6328 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6329 } 6330 6331 namespace { 6332 enum RTCancelKind { 6333 CancelNoreq = 0, 6334 CancelParallel = 1, 6335 CancelLoop = 2, 6336 CancelSections = 3, 6337 CancelTaskgroup = 4 6338 }; 6339 } // anonymous namespace 6340 6341 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6342 RTCancelKind CancelKind = CancelNoreq; 6343 if (CancelRegion == OMPD_parallel) 6344 CancelKind = CancelParallel; 6345 else if (CancelRegion == OMPD_for) 6346 CancelKind = CancelLoop; 6347 else if (CancelRegion == OMPD_sections) 6348 CancelKind = CancelSections; 6349 else { 6350 assert(CancelRegion == OMPD_taskgroup); 6351 CancelKind = CancelTaskgroup; 6352 } 6353 return CancelKind; 6354 } 6355 6356 void CGOpenMPRuntime::emitCancellationPointCall( 6357 CodeGenFunction &CGF, SourceLocation Loc, 6358 OpenMPDirectiveKind CancelRegion) { 6359 if (!CGF.HaveInsertPoint()) 6360 return; 6361 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6362 // global_tid, kmp_int32 cncl_kind); 6363 if (auto *OMPRegionInfo = 6364 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6365 // For 'cancellation point taskgroup', the task region info may not have a 6366 // cancel. This may instead happen in another adjacent task. 6367 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6368 llvm::Value *Args[] = { 6369 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6370 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6371 // Ignore return result until untied tasks are supported. 6372 llvm::Value *Result = CGF.EmitRuntimeCall( 6373 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6374 // if (__kmpc_cancellationpoint()) { 6375 // exit from construct; 6376 // } 6377 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6378 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6379 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6380 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6381 CGF.EmitBlock(ExitBB); 6382 // exit from construct; 6383 CodeGenFunction::JumpDest CancelDest = 6384 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6385 CGF.EmitBranchThroughCleanup(CancelDest); 6386 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6387 } 6388 } 6389 } 6390 6391 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6392 const Expr *IfCond, 6393 OpenMPDirectiveKind CancelRegion) { 6394 if (!CGF.HaveInsertPoint()) 6395 return; 6396 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6397 // kmp_int32 cncl_kind); 6398 if (auto *OMPRegionInfo = 6399 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6400 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6401 PrePostActionTy &) { 6402 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6403 llvm::Value *Args[] = { 6404 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6405 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6406 // Ignore return result until untied tasks are supported. 6407 llvm::Value *Result = CGF.EmitRuntimeCall( 6408 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6409 // if (__kmpc_cancel()) { 6410 // exit from construct; 6411 // } 6412 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6413 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6414 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6415 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6416 CGF.EmitBlock(ExitBB); 6417 // exit from construct; 6418 CodeGenFunction::JumpDest CancelDest = 6419 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6420 CGF.EmitBranchThroughCleanup(CancelDest); 6421 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6422 }; 6423 if (IfCond) { 6424 emitOMPIfClause(CGF, IfCond, ThenGen, 6425 [](CodeGenFunction &, PrePostActionTy &) {}); 6426 } else { 6427 RegionCodeGenTy ThenRCG(ThenGen); 6428 ThenRCG(CGF); 6429 } 6430 } 6431 } 6432 6433 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6434 const OMPExecutableDirective &D, StringRef ParentName, 6435 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6436 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6437 assert(!ParentName.empty() && "Invalid target region parent name!"); 6438 HasEmittedTargetRegion = true; 6439 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6440 IsOffloadEntry, CodeGen); 6441 } 6442 6443 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6444 const OMPExecutableDirective &D, StringRef ParentName, 6445 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6446 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6447 // Create a unique name for the entry function using the source location 6448 // information of the current target region. The name will be something like: 6449 // 6450 // __omp_offloading_DD_FFFF_PP_lBB 6451 // 6452 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6453 // mangled name of the function that encloses the target region and BB is the 6454 // line number of the target region. 6455 6456 unsigned DeviceID; 6457 unsigned FileID; 6458 unsigned Line; 6459 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6460 Line); 6461 SmallString<64> EntryFnName; 6462 { 6463 llvm::raw_svector_ostream OS(EntryFnName); 6464 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6465 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6466 } 6467 6468 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6469 6470 CodeGenFunction CGF(CGM, true); 6471 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6472 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6473 6474 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6475 6476 // If this target outline function is not an offload entry, we don't need to 6477 // register it. 6478 if (!IsOffloadEntry) 6479 return; 6480 6481 // The target region ID is used by the runtime library to identify the current 6482 // target region, so it only has to be unique and not necessarily point to 6483 // anything. It could be the pointer to the outlined function that implements 6484 // the target region, but we aren't using that so that the compiler doesn't 6485 // need to keep that, and could therefore inline the host function if proven 6486 // worthwhile during optimization. In the other hand, if emitting code for the 6487 // device, the ID has to be the function address so that it can retrieved from 6488 // the offloading entry and launched by the runtime library. We also mark the 6489 // outlined function to have external linkage in case we are emitting code for 6490 // the device, because these functions will be entry points to the device. 6491 6492 if (CGM.getLangOpts().OpenMPIsDevice) { 6493 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6494 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6495 OutlinedFn->setDSOLocal(false); 6496 } else { 6497 std::string Name = getName({EntryFnName, "region_id"}); 6498 OutlinedFnID = new llvm::GlobalVariable( 6499 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6500 llvm::GlobalValue::WeakAnyLinkage, 6501 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6502 } 6503 6504 // Register the information for the entry associated with this target region. 6505 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6506 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6507 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6508 } 6509 6510 /// Checks if the expression is constant or does not have non-trivial function 6511 /// calls. 6512 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6513 // We can skip constant expressions. 6514 // We can skip expressions with trivial calls or simple expressions. 6515 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6516 !E->hasNonTrivialCall(Ctx)) && 6517 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6518 } 6519 6520 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6521 const Stmt *Body) { 6522 const Stmt *Child = Body->IgnoreContainers(); 6523 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6524 Child = nullptr; 6525 for (const Stmt *S : C->body()) { 6526 if (const auto *E = dyn_cast<Expr>(S)) { 6527 if (isTrivial(Ctx, E)) 6528 continue; 6529 } 6530 // Some of the statements can be ignored. 6531 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6532 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6533 continue; 6534 // Analyze declarations. 6535 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6536 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6537 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6538 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6539 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6540 isa<UsingDirectiveDecl>(D) || 6541 isa<OMPDeclareReductionDecl>(D) || 6542 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6543 return true; 6544 const auto *VD = dyn_cast<VarDecl>(D); 6545 if (!VD) 6546 return false; 6547 return VD->isConstexpr() || 6548 ((VD->getType().isTrivialType(Ctx) || 6549 VD->getType()->isReferenceType()) && 6550 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6551 })) 6552 continue; 6553 } 6554 // Found multiple children - cannot get the one child only. 6555 if (Child) 6556 return nullptr; 6557 Child = S; 6558 } 6559 if (Child) 6560 Child = Child->IgnoreContainers(); 6561 } 6562 return Child; 6563 } 6564 6565 /// Emit the number of teams for a target directive. Inspect the num_teams 6566 /// clause associated with a teams construct combined or closely nested 6567 /// with the target directive. 6568 /// 6569 /// Emit a team of size one for directives such as 'target parallel' that 6570 /// have no associated teams construct. 6571 /// 6572 /// Otherwise, return nullptr. 6573 static llvm::Value * 6574 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6575 const OMPExecutableDirective &D) { 6576 assert(!CGF.getLangOpts().OpenMPIsDevice && 6577 "Clauses associated with the teams directive expected to be emitted " 6578 "only for the host!"); 6579 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6580 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6581 "Expected target-based executable directive."); 6582 CGBuilderTy &Bld = CGF.Builder; 6583 switch (DirectiveKind) { 6584 case OMPD_target: { 6585 const auto *CS = D.getInnermostCapturedStmt(); 6586 const auto *Body = 6587 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6588 const Stmt *ChildStmt = 6589 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6590 if (const auto *NestedDir = 6591 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6592 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6593 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6594 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6595 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6596 const Expr *NumTeams = 6597 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6598 llvm::Value *NumTeamsVal = 6599 CGF.EmitScalarExpr(NumTeams, 6600 /*IgnoreResultAssign*/ true); 6601 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6602 /*IsSigned=*/true); 6603 } 6604 return Bld.getInt32(0); 6605 } 6606 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6607 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6608 return Bld.getInt32(1); 6609 return Bld.getInt32(0); 6610 } 6611 return nullptr; 6612 } 6613 case OMPD_target_teams: 6614 case OMPD_target_teams_distribute: 6615 case OMPD_target_teams_distribute_simd: 6616 case OMPD_target_teams_distribute_parallel_for: 6617 case OMPD_target_teams_distribute_parallel_for_simd: { 6618 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6619 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6620 const Expr *NumTeams = 6621 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6622 llvm::Value *NumTeamsVal = 6623 CGF.EmitScalarExpr(NumTeams, 6624 /*IgnoreResultAssign*/ true); 6625 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6626 /*IsSigned=*/true); 6627 } 6628 return Bld.getInt32(0); 6629 } 6630 case OMPD_target_parallel: 6631 case OMPD_target_parallel_for: 6632 case OMPD_target_parallel_for_simd: 6633 case OMPD_target_simd: 6634 return Bld.getInt32(1); 6635 case OMPD_parallel: 6636 case OMPD_for: 6637 case OMPD_parallel_for: 6638 case OMPD_parallel_sections: 6639 case OMPD_for_simd: 6640 case OMPD_parallel_for_simd: 6641 case OMPD_cancel: 6642 case OMPD_cancellation_point: 6643 case OMPD_ordered: 6644 case OMPD_threadprivate: 6645 case OMPD_allocate: 6646 case OMPD_task: 6647 case OMPD_simd: 6648 case OMPD_sections: 6649 case OMPD_section: 6650 case OMPD_single: 6651 case OMPD_master: 6652 case OMPD_critical: 6653 case OMPD_taskyield: 6654 case OMPD_barrier: 6655 case OMPD_taskwait: 6656 case OMPD_taskgroup: 6657 case OMPD_atomic: 6658 case OMPD_flush: 6659 case OMPD_teams: 6660 case OMPD_target_data: 6661 case OMPD_target_exit_data: 6662 case OMPD_target_enter_data: 6663 case OMPD_distribute: 6664 case OMPD_distribute_simd: 6665 case OMPD_distribute_parallel_for: 6666 case OMPD_distribute_parallel_for_simd: 6667 case OMPD_teams_distribute: 6668 case OMPD_teams_distribute_simd: 6669 case OMPD_teams_distribute_parallel_for: 6670 case OMPD_teams_distribute_parallel_for_simd: 6671 case OMPD_target_update: 6672 case OMPD_declare_simd: 6673 case OMPD_declare_target: 6674 case OMPD_end_declare_target: 6675 case OMPD_declare_reduction: 6676 case OMPD_declare_mapper: 6677 case OMPD_taskloop: 6678 case OMPD_taskloop_simd: 6679 case OMPD_requires: 6680 case OMPD_unknown: 6681 break; 6682 } 6683 llvm_unreachable("Unexpected directive kind."); 6684 } 6685 6686 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6687 llvm::Value *DefaultThreadLimitVal) { 6688 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6689 CGF.getContext(), CS->getCapturedStmt()); 6690 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6691 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6692 llvm::Value *NumThreads = nullptr; 6693 llvm::Value *CondVal = nullptr; 6694 // Handle if clause. If if clause present, the number of threads is 6695 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6696 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6697 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6698 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6699 const OMPIfClause *IfClause = nullptr; 6700 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6701 if (C->getNameModifier() == OMPD_unknown || 6702 C->getNameModifier() == OMPD_parallel) { 6703 IfClause = C; 6704 break; 6705 } 6706 } 6707 if (IfClause) { 6708 const Expr *Cond = IfClause->getCondition(); 6709 bool Result; 6710 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6711 if (!Result) 6712 return CGF.Builder.getInt32(1); 6713 } else { 6714 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6715 if (const auto *PreInit = 6716 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6717 for (const auto *I : PreInit->decls()) { 6718 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6719 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6720 } else { 6721 CodeGenFunction::AutoVarEmission Emission = 6722 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6723 CGF.EmitAutoVarCleanups(Emission); 6724 } 6725 } 6726 } 6727 CondVal = CGF.EvaluateExprAsBool(Cond); 6728 } 6729 } 6730 } 6731 // Check the value of num_threads clause iff if clause was not specified 6732 // or is not evaluated to false. 6733 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6734 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6735 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6736 const auto *NumThreadsClause = 6737 Dir->getSingleClause<OMPNumThreadsClause>(); 6738 CodeGenFunction::LexicalScope Scope( 6739 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6740 if (const auto *PreInit = 6741 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6742 for (const auto *I : PreInit->decls()) { 6743 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6744 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6745 } else { 6746 CodeGenFunction::AutoVarEmission Emission = 6747 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6748 CGF.EmitAutoVarCleanups(Emission); 6749 } 6750 } 6751 } 6752 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6753 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6754 /*IsSigned=*/false); 6755 if (DefaultThreadLimitVal) 6756 NumThreads = CGF.Builder.CreateSelect( 6757 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6758 DefaultThreadLimitVal, NumThreads); 6759 } else { 6760 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6761 : CGF.Builder.getInt32(0); 6762 } 6763 // Process condition of the if clause. 6764 if (CondVal) { 6765 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6766 CGF.Builder.getInt32(1)); 6767 } 6768 return NumThreads; 6769 } 6770 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6771 return CGF.Builder.getInt32(1); 6772 return DefaultThreadLimitVal; 6773 } 6774 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6775 : CGF.Builder.getInt32(0); 6776 } 6777 6778 /// Emit the number of threads for a target directive. Inspect the 6779 /// thread_limit clause associated with a teams construct combined or closely 6780 /// nested with the target directive. 6781 /// 6782 /// Emit the num_threads clause for directives such as 'target parallel' that 6783 /// have no associated teams construct. 6784 /// 6785 /// Otherwise, return nullptr. 6786 static llvm::Value * 6787 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6788 const OMPExecutableDirective &D) { 6789 assert(!CGF.getLangOpts().OpenMPIsDevice && 6790 "Clauses associated with the teams directive expected to be emitted " 6791 "only for the host!"); 6792 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6793 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6794 "Expected target-based executable directive."); 6795 CGBuilderTy &Bld = CGF.Builder; 6796 llvm::Value *ThreadLimitVal = nullptr; 6797 llvm::Value *NumThreadsVal = nullptr; 6798 switch (DirectiveKind) { 6799 case OMPD_target: { 6800 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6801 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6802 return NumThreads; 6803 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6804 CGF.getContext(), CS->getCapturedStmt()); 6805 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6806 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6807 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6808 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6809 const auto *ThreadLimitClause = 6810 Dir->getSingleClause<OMPThreadLimitClause>(); 6811 CodeGenFunction::LexicalScope Scope( 6812 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6813 if (const auto *PreInit = 6814 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6815 for (const auto *I : PreInit->decls()) { 6816 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6817 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6818 } else { 6819 CodeGenFunction::AutoVarEmission Emission = 6820 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6821 CGF.EmitAutoVarCleanups(Emission); 6822 } 6823 } 6824 } 6825 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6826 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6827 ThreadLimitVal = 6828 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6829 } 6830 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6831 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6832 CS = Dir->getInnermostCapturedStmt(); 6833 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6834 CGF.getContext(), CS->getCapturedStmt()); 6835 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6836 } 6837 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6838 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6839 CS = Dir->getInnermostCapturedStmt(); 6840 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6841 return NumThreads; 6842 } 6843 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6844 return Bld.getInt32(1); 6845 } 6846 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6847 } 6848 case OMPD_target_teams: { 6849 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6850 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6851 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6852 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6853 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6854 ThreadLimitVal = 6855 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6856 } 6857 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6858 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6859 return NumThreads; 6860 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6861 CGF.getContext(), CS->getCapturedStmt()); 6862 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6863 if (Dir->getDirectiveKind() == OMPD_distribute) { 6864 CS = Dir->getInnermostCapturedStmt(); 6865 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6866 return NumThreads; 6867 } 6868 } 6869 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6870 } 6871 case OMPD_target_teams_distribute: 6872 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6873 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6874 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6875 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6876 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6877 ThreadLimitVal = 6878 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6879 } 6880 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6881 case OMPD_target_parallel: 6882 case OMPD_target_parallel_for: 6883 case OMPD_target_parallel_for_simd: 6884 case OMPD_target_teams_distribute_parallel_for: 6885 case OMPD_target_teams_distribute_parallel_for_simd: { 6886 llvm::Value *CondVal = nullptr; 6887 // Handle if clause. If if clause present, the number of threads is 6888 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6889 if (D.hasClausesOfKind<OMPIfClause>()) { 6890 const OMPIfClause *IfClause = nullptr; 6891 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6892 if (C->getNameModifier() == OMPD_unknown || 6893 C->getNameModifier() == OMPD_parallel) { 6894 IfClause = C; 6895 break; 6896 } 6897 } 6898 if (IfClause) { 6899 const Expr *Cond = IfClause->getCondition(); 6900 bool Result; 6901 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6902 if (!Result) 6903 return Bld.getInt32(1); 6904 } else { 6905 CodeGenFunction::RunCleanupsScope Scope(CGF); 6906 CondVal = CGF.EvaluateExprAsBool(Cond); 6907 } 6908 } 6909 } 6910 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6911 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6912 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6913 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6914 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6915 ThreadLimitVal = 6916 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6917 } 6918 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6919 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6920 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6921 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6922 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6923 NumThreadsVal = 6924 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false); 6925 ThreadLimitVal = ThreadLimitVal 6926 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6927 ThreadLimitVal), 6928 NumThreadsVal, ThreadLimitVal) 6929 : NumThreadsVal; 6930 } 6931 if (!ThreadLimitVal) 6932 ThreadLimitVal = Bld.getInt32(0); 6933 if (CondVal) 6934 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6935 return ThreadLimitVal; 6936 } 6937 case OMPD_target_teams_distribute_simd: 6938 case OMPD_target_simd: 6939 return Bld.getInt32(1); 6940 case OMPD_parallel: 6941 case OMPD_for: 6942 case OMPD_parallel_for: 6943 case OMPD_parallel_sections: 6944 case OMPD_for_simd: 6945 case OMPD_parallel_for_simd: 6946 case OMPD_cancel: 6947 case OMPD_cancellation_point: 6948 case OMPD_ordered: 6949 case OMPD_threadprivate: 6950 case OMPD_allocate: 6951 case OMPD_task: 6952 case OMPD_simd: 6953 case OMPD_sections: 6954 case OMPD_section: 6955 case OMPD_single: 6956 case OMPD_master: 6957 case OMPD_critical: 6958 case OMPD_taskyield: 6959 case OMPD_barrier: 6960 case OMPD_taskwait: 6961 case OMPD_taskgroup: 6962 case OMPD_atomic: 6963 case OMPD_flush: 6964 case OMPD_teams: 6965 case OMPD_target_data: 6966 case OMPD_target_exit_data: 6967 case OMPD_target_enter_data: 6968 case OMPD_distribute: 6969 case OMPD_distribute_simd: 6970 case OMPD_distribute_parallel_for: 6971 case OMPD_distribute_parallel_for_simd: 6972 case OMPD_teams_distribute: 6973 case OMPD_teams_distribute_simd: 6974 case OMPD_teams_distribute_parallel_for: 6975 case OMPD_teams_distribute_parallel_for_simd: 6976 case OMPD_target_update: 6977 case OMPD_declare_simd: 6978 case OMPD_declare_target: 6979 case OMPD_end_declare_target: 6980 case OMPD_declare_reduction: 6981 case OMPD_declare_mapper: 6982 case OMPD_taskloop: 6983 case OMPD_taskloop_simd: 6984 case OMPD_requires: 6985 case OMPD_unknown: 6986 break; 6987 } 6988 llvm_unreachable("Unsupported directive kind."); 6989 } 6990 6991 namespace { 6992 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6993 6994 // Utility to handle information from clauses associated with a given 6995 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6996 // It provides a convenient interface to obtain the information and generate 6997 // code for that information. 6998 class MappableExprsHandler { 6999 public: 7000 /// Values for bit flags used to specify the mapping type for 7001 /// offloading. 7002 enum OpenMPOffloadMappingFlags : uint64_t { 7003 /// No flags 7004 OMP_MAP_NONE = 0x0, 7005 /// Allocate memory on the device and move data from host to device. 7006 OMP_MAP_TO = 0x01, 7007 /// Allocate memory on the device and move data from device to host. 7008 OMP_MAP_FROM = 0x02, 7009 /// Always perform the requested mapping action on the element, even 7010 /// if it was already mapped before. 7011 OMP_MAP_ALWAYS = 0x04, 7012 /// Delete the element from the device environment, ignoring the 7013 /// current reference count associated with the element. 7014 OMP_MAP_DELETE = 0x08, 7015 /// The element being mapped is a pointer-pointee pair; both the 7016 /// pointer and the pointee should be mapped. 7017 OMP_MAP_PTR_AND_OBJ = 0x10, 7018 /// This flags signals that the base address of an entry should be 7019 /// passed to the target kernel as an argument. 7020 OMP_MAP_TARGET_PARAM = 0x20, 7021 /// Signal that the runtime library has to return the device pointer 7022 /// in the current position for the data being mapped. Used when we have the 7023 /// use_device_ptr clause. 7024 OMP_MAP_RETURN_PARAM = 0x40, 7025 /// This flag signals that the reference being passed is a pointer to 7026 /// private data. 7027 OMP_MAP_PRIVATE = 0x80, 7028 /// Pass the element to the device by value. 7029 OMP_MAP_LITERAL = 0x100, 7030 /// Implicit map 7031 OMP_MAP_IMPLICIT = 0x200, 7032 /// The 16 MSBs of the flags indicate whether the entry is member of some 7033 /// struct/class. 7034 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7035 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7036 }; 7037 7038 /// Class that associates information with a base pointer to be passed to the 7039 /// runtime library. 7040 class BasePointerInfo { 7041 /// The base pointer. 7042 llvm::Value *Ptr = nullptr; 7043 /// The base declaration that refers to this device pointer, or null if 7044 /// there is none. 7045 const ValueDecl *DevPtrDecl = nullptr; 7046 7047 public: 7048 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7049 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7050 llvm::Value *operator*() const { return Ptr; } 7051 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7052 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7053 }; 7054 7055 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7056 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7057 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7058 7059 /// Map between a struct and the its lowest & highest elements which have been 7060 /// mapped. 7061 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7062 /// HE(FieldIndex, Pointer)} 7063 struct StructRangeInfoTy { 7064 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7065 0, Address::invalid()}; 7066 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7067 0, Address::invalid()}; 7068 Address Base = Address::invalid(); 7069 }; 7070 7071 private: 7072 /// Kind that defines how a device pointer has to be returned. 7073 struct MapInfo { 7074 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7075 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7076 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7077 bool ReturnDevicePointer = false; 7078 bool IsImplicit = false; 7079 7080 MapInfo() = default; 7081 MapInfo( 7082 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7083 OpenMPMapClauseKind MapType, 7084 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7085 bool ReturnDevicePointer, bool IsImplicit) 7086 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7087 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7088 }; 7089 7090 /// If use_device_ptr is used on a pointer which is a struct member and there 7091 /// is no map information about it, then emission of that entry is deferred 7092 /// until the whole struct has been processed. 7093 struct DeferredDevicePtrEntryTy { 7094 const Expr *IE = nullptr; 7095 const ValueDecl *VD = nullptr; 7096 7097 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7098 : IE(IE), VD(VD) {} 7099 }; 7100 7101 /// Directive from where the map clauses were extracted. 7102 const OMPExecutableDirective &CurDir; 7103 7104 /// Function the directive is being generated for. 7105 CodeGenFunction &CGF; 7106 7107 /// Set of all first private variables in the current directive. 7108 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 7109 7110 /// Map between device pointer declarations and their expression components. 7111 /// The key value for declarations in 'this' is null. 7112 llvm::DenseMap< 7113 const ValueDecl *, 7114 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7115 DevPointersMap; 7116 7117 llvm::Value *getExprTypeSize(const Expr *E) const { 7118 QualType ExprTy = E->getType().getCanonicalType(); 7119 7120 // Reference types are ignored for mapping purposes. 7121 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7122 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7123 7124 // Given that an array section is considered a built-in type, we need to 7125 // do the calculation based on the length of the section instead of relying 7126 // on CGF.getTypeSize(E->getType()). 7127 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7128 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7129 OAE->getBase()->IgnoreParenImpCasts()) 7130 .getCanonicalType(); 7131 7132 // If there is no length associated with the expression, that means we 7133 // are using the whole length of the base. 7134 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 7135 return CGF.getTypeSize(BaseTy); 7136 7137 llvm::Value *ElemSize; 7138 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7139 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7140 } else { 7141 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7142 assert(ATy && "Expecting array type if not a pointer type."); 7143 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7144 } 7145 7146 // If we don't have a length at this point, that is because we have an 7147 // array section with a single element. 7148 if (!OAE->getLength()) 7149 return ElemSize; 7150 7151 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 7152 LengthVal = 7153 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 7154 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7155 } 7156 return CGF.getTypeSize(ExprTy); 7157 } 7158 7159 /// Return the corresponding bits for a given map clause modifier. Add 7160 /// a flag marking the map as a pointer if requested. Add a flag marking the 7161 /// map as the first one of a series of maps that relate to the same map 7162 /// expression. 7163 OpenMPOffloadMappingFlags getMapTypeBits( 7164 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7165 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7166 OpenMPOffloadMappingFlags Bits = 7167 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7168 switch (MapType) { 7169 case OMPC_MAP_alloc: 7170 case OMPC_MAP_release: 7171 // alloc and release is the default behavior in the runtime library, i.e. 7172 // if we don't pass any bits alloc/release that is what the runtime is 7173 // going to do. Therefore, we don't need to signal anything for these two 7174 // type modifiers. 7175 break; 7176 case OMPC_MAP_to: 7177 Bits |= OMP_MAP_TO; 7178 break; 7179 case OMPC_MAP_from: 7180 Bits |= OMP_MAP_FROM; 7181 break; 7182 case OMPC_MAP_tofrom: 7183 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7184 break; 7185 case OMPC_MAP_delete: 7186 Bits |= OMP_MAP_DELETE; 7187 break; 7188 case OMPC_MAP_unknown: 7189 llvm_unreachable("Unexpected map type!"); 7190 } 7191 if (AddPtrFlag) 7192 Bits |= OMP_MAP_PTR_AND_OBJ; 7193 if (AddIsTargetParamFlag) 7194 Bits |= OMP_MAP_TARGET_PARAM; 7195 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7196 != MapModifiers.end()) 7197 Bits |= OMP_MAP_ALWAYS; 7198 return Bits; 7199 } 7200 7201 /// Return true if the provided expression is a final array section. A 7202 /// final array section, is one whose length can't be proved to be one. 7203 bool isFinalArraySectionExpression(const Expr *E) const { 7204 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7205 7206 // It is not an array section and therefore not a unity-size one. 7207 if (!OASE) 7208 return false; 7209 7210 // An array section with no colon always refer to a single element. 7211 if (OASE->getColonLoc().isInvalid()) 7212 return false; 7213 7214 const Expr *Length = OASE->getLength(); 7215 7216 // If we don't have a length we have to check if the array has size 1 7217 // for this dimension. Also, we should always expect a length if the 7218 // base type is pointer. 7219 if (!Length) { 7220 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7221 OASE->getBase()->IgnoreParenImpCasts()) 7222 .getCanonicalType(); 7223 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7224 return ATy->getSize().getSExtValue() != 1; 7225 // If we don't have a constant dimension length, we have to consider 7226 // the current section as having any size, so it is not necessarily 7227 // unitary. If it happen to be unity size, that's user fault. 7228 return true; 7229 } 7230 7231 // Check if the length evaluates to 1. 7232 Expr::EvalResult Result; 7233 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7234 return true; // Can have more that size 1. 7235 7236 llvm::APSInt ConstLength = Result.Val.getInt(); 7237 return ConstLength.getSExtValue() != 1; 7238 } 7239 7240 /// Generate the base pointers, section pointers, sizes and map type 7241 /// bits for the provided map type, map modifier, and expression components. 7242 /// \a IsFirstComponent should be set to true if the provided set of 7243 /// components is the first associated with a capture. 7244 void generateInfoForComponentList( 7245 OpenMPMapClauseKind MapType, 7246 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7247 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7248 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7249 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7250 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7251 bool IsImplicit, 7252 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7253 OverlappedElements = llvm::None) const { 7254 // The following summarizes what has to be generated for each map and the 7255 // types below. The generated information is expressed in this order: 7256 // base pointer, section pointer, size, flags 7257 // (to add to the ones that come from the map type and modifier). 7258 // 7259 // double d; 7260 // int i[100]; 7261 // float *p; 7262 // 7263 // struct S1 { 7264 // int i; 7265 // float f[50]; 7266 // } 7267 // struct S2 { 7268 // int i; 7269 // float f[50]; 7270 // S1 s; 7271 // double *p; 7272 // struct S2 *ps; 7273 // } 7274 // S2 s; 7275 // S2 *ps; 7276 // 7277 // map(d) 7278 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7279 // 7280 // map(i) 7281 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7282 // 7283 // map(i[1:23]) 7284 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7285 // 7286 // map(p) 7287 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7288 // 7289 // map(p[1:24]) 7290 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7291 // 7292 // map(s) 7293 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7294 // 7295 // map(s.i) 7296 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7297 // 7298 // map(s.s.f) 7299 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7300 // 7301 // map(s.p) 7302 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7303 // 7304 // map(to: s.p[:22]) 7305 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7306 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7307 // &(s.p), &(s.p[0]), 22*sizeof(double), 7308 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7309 // (*) alloc space for struct members, only this is a target parameter 7310 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7311 // optimizes this entry out, same in the examples below) 7312 // (***) map the pointee (map: to) 7313 // 7314 // map(s.ps) 7315 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7316 // 7317 // map(from: s.ps->s.i) 7318 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7319 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7320 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7321 // 7322 // map(to: s.ps->ps) 7323 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7324 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7325 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7326 // 7327 // map(s.ps->ps->ps) 7328 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7329 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7330 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7331 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7332 // 7333 // map(to: s.ps->ps->s.f[:22]) 7334 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7335 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7336 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7337 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7338 // 7339 // map(ps) 7340 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7341 // 7342 // map(ps->i) 7343 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7344 // 7345 // map(ps->s.f) 7346 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7347 // 7348 // map(from: ps->p) 7349 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7350 // 7351 // map(to: ps->p[:22]) 7352 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7353 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7354 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7355 // 7356 // map(ps->ps) 7357 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7358 // 7359 // map(from: ps->ps->s.i) 7360 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7361 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7362 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7363 // 7364 // map(from: ps->ps->ps) 7365 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7366 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7367 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7368 // 7369 // map(ps->ps->ps->ps) 7370 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7371 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7372 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7373 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7374 // 7375 // map(to: ps->ps->ps->s.f[:22]) 7376 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7377 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7378 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7379 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7380 // 7381 // map(to: s.f[:22]) map(from: s.p[:33]) 7382 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7383 // sizeof(double*) (**), TARGET_PARAM 7384 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7385 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7386 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7387 // (*) allocate contiguous space needed to fit all mapped members even if 7388 // we allocate space for members not mapped (in this example, 7389 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7390 // them as well because they fall between &s.f[0] and &s.p) 7391 // 7392 // map(from: s.f[:22]) map(to: ps->p[:33]) 7393 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7394 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7395 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7396 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7397 // (*) the struct this entry pertains to is the 2nd element in the list of 7398 // arguments, hence MEMBER_OF(2) 7399 // 7400 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7401 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7402 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7403 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7404 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7405 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7406 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7407 // (*) the struct this entry pertains to is the 4th element in the list 7408 // of arguments, hence MEMBER_OF(4) 7409 7410 // Track if the map information being generated is the first for a capture. 7411 bool IsCaptureFirstInfo = IsFirstComponentList; 7412 bool IsLink = false; // Is this variable a "declare target link"? 7413 7414 // Scan the components from the base to the complete expression. 7415 auto CI = Components.rbegin(); 7416 auto CE = Components.rend(); 7417 auto I = CI; 7418 7419 // Track if the map information being generated is the first for a list of 7420 // components. 7421 bool IsExpressionFirstInfo = true; 7422 Address BP = Address::invalid(); 7423 const Expr *AssocExpr = I->getAssociatedExpression(); 7424 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7425 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7426 7427 if (isa<MemberExpr>(AssocExpr)) { 7428 // The base is the 'this' pointer. The content of the pointer is going 7429 // to be the base of the field being mapped. 7430 BP = CGF.LoadCXXThisAddress(); 7431 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7432 (OASE && 7433 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7434 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7435 } else { 7436 // The base is the reference to the variable. 7437 // BP = &Var. 7438 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7439 if (const auto *VD = 7440 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7441 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7442 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) 7443 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { 7444 IsLink = true; 7445 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 7446 } 7447 } 7448 7449 // If the variable is a pointer and is being dereferenced (i.e. is not 7450 // the last component), the base has to be the pointer itself, not its 7451 // reference. References are ignored for mapping purposes. 7452 QualType Ty = 7453 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7454 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7455 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7456 7457 // We do not need to generate individual map information for the 7458 // pointer, it can be associated with the combined storage. 7459 ++I; 7460 } 7461 } 7462 7463 // Track whether a component of the list should be marked as MEMBER_OF some 7464 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7465 // in a component list should be marked as MEMBER_OF, all subsequent entries 7466 // do not belong to the base struct. E.g. 7467 // struct S2 s; 7468 // s.ps->ps->ps->f[:] 7469 // (1) (2) (3) (4) 7470 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7471 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7472 // is the pointee of ps(2) which is not member of struct s, so it should not 7473 // be marked as such (it is still PTR_AND_OBJ). 7474 // The variable is initialized to false so that PTR_AND_OBJ entries which 7475 // are not struct members are not considered (e.g. array of pointers to 7476 // data). 7477 bool ShouldBeMemberOf = false; 7478 7479 // Variable keeping track of whether or not we have encountered a component 7480 // in the component list which is a member expression. Useful when we have a 7481 // pointer or a final array section, in which case it is the previous 7482 // component in the list which tells us whether we have a member expression. 7483 // E.g. X.f[:] 7484 // While processing the final array section "[:]" it is "f" which tells us 7485 // whether we are dealing with a member of a declared struct. 7486 const MemberExpr *EncounteredME = nullptr; 7487 7488 for (; I != CE; ++I) { 7489 // If the current component is member of a struct (parent struct) mark it. 7490 if (!EncounteredME) { 7491 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7492 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7493 // as MEMBER_OF the parent struct. 7494 if (EncounteredME) 7495 ShouldBeMemberOf = true; 7496 } 7497 7498 auto Next = std::next(I); 7499 7500 // We need to generate the addresses and sizes if this is the last 7501 // component, if the component is a pointer or if it is an array section 7502 // whose length can't be proved to be one. If this is a pointer, it 7503 // becomes the base address for the following components. 7504 7505 // A final array section, is one whose length can't be proved to be one. 7506 bool IsFinalArraySection = 7507 isFinalArraySectionExpression(I->getAssociatedExpression()); 7508 7509 // Get information on whether the element is a pointer. Have to do a 7510 // special treatment for array sections given that they are built-in 7511 // types. 7512 const auto *OASE = 7513 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7514 bool IsPointer = 7515 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7516 .getCanonicalType() 7517 ->isAnyPointerType()) || 7518 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7519 7520 if (Next == CE || IsPointer || IsFinalArraySection) { 7521 // If this is not the last component, we expect the pointer to be 7522 // associated with an array expression or member expression. 7523 assert((Next == CE || 7524 isa<MemberExpr>(Next->getAssociatedExpression()) || 7525 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7526 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7527 "Unexpected expression"); 7528 7529 Address LB = 7530 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7531 7532 // If this component is a pointer inside the base struct then we don't 7533 // need to create any entry for it - it will be combined with the object 7534 // it is pointing to into a single PTR_AND_OBJ entry. 7535 bool IsMemberPointer = 7536 IsPointer && EncounteredME && 7537 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7538 EncounteredME); 7539 if (!OverlappedElements.empty()) { 7540 // Handle base element with the info for overlapped elements. 7541 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7542 assert(Next == CE && 7543 "Expected last element for the overlapped elements."); 7544 assert(!IsPointer && 7545 "Unexpected base element with the pointer type."); 7546 // Mark the whole struct as the struct that requires allocation on the 7547 // device. 7548 PartialStruct.LowestElem = {0, LB}; 7549 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7550 I->getAssociatedExpression()->getType()); 7551 Address HB = CGF.Builder.CreateConstGEP( 7552 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7553 CGF.VoidPtrTy), 7554 TypeSize.getQuantity() - 1); 7555 PartialStruct.HighestElem = { 7556 std::numeric_limits<decltype( 7557 PartialStruct.HighestElem.first)>::max(), 7558 HB}; 7559 PartialStruct.Base = BP; 7560 // Emit data for non-overlapped data. 7561 OpenMPOffloadMappingFlags Flags = 7562 OMP_MAP_MEMBER_OF | 7563 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7564 /*AddPtrFlag=*/false, 7565 /*AddIsTargetParamFlag=*/false); 7566 LB = BP; 7567 llvm::Value *Size = nullptr; 7568 // Do bitcopy of all non-overlapped structure elements. 7569 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7570 Component : OverlappedElements) { 7571 Address ComponentLB = Address::invalid(); 7572 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7573 Component) { 7574 if (MC.getAssociatedDeclaration()) { 7575 ComponentLB = 7576 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7577 .getAddress(); 7578 Size = CGF.Builder.CreatePtrDiff( 7579 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7580 CGF.EmitCastToVoidPtr(LB.getPointer())); 7581 break; 7582 } 7583 } 7584 BasePointers.push_back(BP.getPointer()); 7585 Pointers.push_back(LB.getPointer()); 7586 Sizes.push_back(Size); 7587 Types.push_back(Flags); 7588 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7589 } 7590 BasePointers.push_back(BP.getPointer()); 7591 Pointers.push_back(LB.getPointer()); 7592 Size = CGF.Builder.CreatePtrDiff( 7593 CGF.EmitCastToVoidPtr( 7594 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7595 CGF.EmitCastToVoidPtr(LB.getPointer())); 7596 Sizes.push_back(Size); 7597 Types.push_back(Flags); 7598 break; 7599 } 7600 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7601 if (!IsMemberPointer) { 7602 BasePointers.push_back(BP.getPointer()); 7603 Pointers.push_back(LB.getPointer()); 7604 Sizes.push_back(Size); 7605 7606 // We need to add a pointer flag for each map that comes from the 7607 // same expression except for the first one. We also need to signal 7608 // this map is the first one that relates with the current capture 7609 // (there is a set of entries for each capture). 7610 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7611 MapType, MapModifiers, IsImplicit, 7612 !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); 7613 7614 if (!IsExpressionFirstInfo) { 7615 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7616 // then we reset the TO/FROM/ALWAYS/DELETE flags. 7617 if (IsPointer) 7618 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7619 OMP_MAP_DELETE); 7620 7621 if (ShouldBeMemberOf) { 7622 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7623 // should be later updated with the correct value of MEMBER_OF. 7624 Flags |= OMP_MAP_MEMBER_OF; 7625 // From now on, all subsequent PTR_AND_OBJ entries should not be 7626 // marked as MEMBER_OF. 7627 ShouldBeMemberOf = false; 7628 } 7629 } 7630 7631 Types.push_back(Flags); 7632 } 7633 7634 // If we have encountered a member expression so far, keep track of the 7635 // mapped member. If the parent is "*this", then the value declaration 7636 // is nullptr. 7637 if (EncounteredME) { 7638 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7639 unsigned FieldIndex = FD->getFieldIndex(); 7640 7641 // Update info about the lowest and highest elements for this struct 7642 if (!PartialStruct.Base.isValid()) { 7643 PartialStruct.LowestElem = {FieldIndex, LB}; 7644 PartialStruct.HighestElem = {FieldIndex, LB}; 7645 PartialStruct.Base = BP; 7646 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7647 PartialStruct.LowestElem = {FieldIndex, LB}; 7648 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7649 PartialStruct.HighestElem = {FieldIndex, LB}; 7650 } 7651 } 7652 7653 // If we have a final array section, we are done with this expression. 7654 if (IsFinalArraySection) 7655 break; 7656 7657 // The pointer becomes the base for the next element. 7658 if (Next != CE) 7659 BP = LB; 7660 7661 IsExpressionFirstInfo = false; 7662 IsCaptureFirstInfo = false; 7663 } 7664 } 7665 } 7666 7667 /// Return the adjusted map modifiers if the declaration a capture refers to 7668 /// appears in a first-private clause. This is expected to be used only with 7669 /// directives that start with 'target'. 7670 MappableExprsHandler::OpenMPOffloadMappingFlags 7671 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7672 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7673 7674 // A first private variable captured by reference will use only the 7675 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7676 // declaration is known as first-private in this handler. 7677 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7678 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7679 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7680 return MappableExprsHandler::OMP_MAP_ALWAYS | 7681 MappableExprsHandler::OMP_MAP_TO; 7682 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7683 return MappableExprsHandler::OMP_MAP_TO | 7684 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7685 return MappableExprsHandler::OMP_MAP_PRIVATE | 7686 MappableExprsHandler::OMP_MAP_TO; 7687 } 7688 return MappableExprsHandler::OMP_MAP_TO | 7689 MappableExprsHandler::OMP_MAP_FROM; 7690 } 7691 7692 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7693 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. 7694 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7695 << 48); 7696 } 7697 7698 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7699 OpenMPOffloadMappingFlags MemberOfFlag) { 7700 // If the entry is PTR_AND_OBJ but has not been marked with the special 7701 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7702 // marked as MEMBER_OF. 7703 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7704 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7705 return; 7706 7707 // Reset the placeholder value to prepare the flag for the assignment of the 7708 // proper MEMBER_OF value. 7709 Flags &= ~OMP_MAP_MEMBER_OF; 7710 Flags |= MemberOfFlag; 7711 } 7712 7713 void getPlainLayout(const CXXRecordDecl *RD, 7714 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7715 bool AsBase) const { 7716 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7717 7718 llvm::StructType *St = 7719 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7720 7721 unsigned NumElements = St->getNumElements(); 7722 llvm::SmallVector< 7723 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7724 RecordLayout(NumElements); 7725 7726 // Fill bases. 7727 for (const auto &I : RD->bases()) { 7728 if (I.isVirtual()) 7729 continue; 7730 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7731 // Ignore empty bases. 7732 if (Base->isEmpty() || CGF.getContext() 7733 .getASTRecordLayout(Base) 7734 .getNonVirtualSize() 7735 .isZero()) 7736 continue; 7737 7738 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7739 RecordLayout[FieldIndex] = Base; 7740 } 7741 // Fill in virtual bases. 7742 for (const auto &I : RD->vbases()) { 7743 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7744 // Ignore empty bases. 7745 if (Base->isEmpty()) 7746 continue; 7747 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7748 if (RecordLayout[FieldIndex]) 7749 continue; 7750 RecordLayout[FieldIndex] = Base; 7751 } 7752 // Fill in all the fields. 7753 assert(!RD->isUnion() && "Unexpected union."); 7754 for (const auto *Field : RD->fields()) { 7755 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7756 // will fill in later.) 7757 if (!Field->isBitField()) { 7758 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7759 RecordLayout[FieldIndex] = Field; 7760 } 7761 } 7762 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7763 &Data : RecordLayout) { 7764 if (Data.isNull()) 7765 continue; 7766 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7767 getPlainLayout(Base, Layout, /*AsBase=*/true); 7768 else 7769 Layout.push_back(Data.get<const FieldDecl *>()); 7770 } 7771 } 7772 7773 public: 7774 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7775 : CurDir(Dir), CGF(CGF) { 7776 // Extract firstprivate clause information. 7777 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7778 for (const auto *D : C->varlists()) 7779 FirstPrivateDecls.insert( 7780 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 7781 // Extract device pointer clause information. 7782 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7783 for (auto L : C->component_lists()) 7784 DevPointersMap[L.first].push_back(L.second); 7785 } 7786 7787 /// Generate code for the combined entry if we have a partially mapped struct 7788 /// and take care of the mapping flags of the arguments corresponding to 7789 /// individual struct members. 7790 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7791 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7792 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7793 const StructRangeInfoTy &PartialStruct) const { 7794 // Base is the base of the struct 7795 BasePointers.push_back(PartialStruct.Base.getPointer()); 7796 // Pointer is the address of the lowest element 7797 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7798 Pointers.push_back(LB); 7799 // Size is (addr of {highest+1} element) - (addr of lowest element) 7800 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7801 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7802 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7803 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7804 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7805 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy, 7806 /*isSinged=*/false); 7807 Sizes.push_back(Size); 7808 // Map type is always TARGET_PARAM 7809 Types.push_back(OMP_MAP_TARGET_PARAM); 7810 // Remove TARGET_PARAM flag from the first element 7811 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7812 7813 // All other current entries will be MEMBER_OF the combined entry 7814 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7815 // 0xFFFF in the MEMBER_OF field). 7816 OpenMPOffloadMappingFlags MemberOfFlag = 7817 getMemberOfFlag(BasePointers.size() - 1); 7818 for (auto &M : CurTypes) 7819 setCorrectMemberOfFlag(M, MemberOfFlag); 7820 } 7821 7822 /// Generate all the base pointers, section pointers, sizes and map 7823 /// types for the extracted mappable expressions. Also, for each item that 7824 /// relates with a device pointer, a pair of the relevant declaration and 7825 /// index where it occurs is appended to the device pointers info array. 7826 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7827 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7828 MapFlagsArrayTy &Types) const { 7829 // We have to process the component lists that relate with the same 7830 // declaration in a single chunk so that we can generate the map flags 7831 // correctly. Therefore, we organize all lists in a map. 7832 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7833 7834 // Helper function to fill the information map for the different supported 7835 // clauses. 7836 auto &&InfoGen = [&Info]( 7837 const ValueDecl *D, 7838 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7839 OpenMPMapClauseKind MapType, 7840 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7841 bool ReturnDevicePointer, bool IsImplicit) { 7842 const ValueDecl *VD = 7843 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7844 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7845 IsImplicit); 7846 }; 7847 7848 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7849 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7850 for (const auto &L : C->component_lists()) { 7851 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7852 /*ReturnDevicePointer=*/false, C->isImplicit()); 7853 } 7854 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 7855 for (const auto &L : C->component_lists()) { 7856 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7857 /*ReturnDevicePointer=*/false, C->isImplicit()); 7858 } 7859 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 7860 for (const auto &L : C->component_lists()) { 7861 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7862 /*ReturnDevicePointer=*/false, C->isImplicit()); 7863 } 7864 7865 // Look at the use_device_ptr clause information and mark the existing map 7866 // entries as such. If there is no map information for an entry in the 7867 // use_device_ptr list, we create one with map type 'alloc' and zero size 7868 // section. It is the user fault if that was not mapped before. If there is 7869 // no map information and the pointer is a struct member, then we defer the 7870 // emission of that entry until the whole struct has been processed. 7871 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7872 DeferredInfo; 7873 7874 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7875 for (const auto *C : 7876 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { 7877 for (const auto &L : C->component_lists()) { 7878 assert(!L.second.empty() && "Not expecting empty list of components!"); 7879 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7880 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7881 const Expr *IE = L.second.back().getAssociatedExpression(); 7882 // If the first component is a member expression, we have to look into 7883 // 'this', which maps to null in the map of map information. Otherwise 7884 // look directly for the information. 7885 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7886 7887 // We potentially have map information for this declaration already. 7888 // Look for the first set of components that refer to it. 7889 if (It != Info.end()) { 7890 auto CI = std::find_if( 7891 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7892 return MI.Components.back().getAssociatedDeclaration() == VD; 7893 }); 7894 // If we found a map entry, signal that the pointer has to be returned 7895 // and move on to the next declaration. 7896 if (CI != It->second.end()) { 7897 CI->ReturnDevicePointer = true; 7898 continue; 7899 } 7900 } 7901 7902 // We didn't find any match in our map information - generate a zero 7903 // size array section - if the pointer is a struct member we defer this 7904 // action until the whole struct has been processed. 7905 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 7906 if (isa<MemberExpr>(IE)) { 7907 // Insert the pointer into Info to be processed by 7908 // generateInfoForComponentList. Because it is a member pointer 7909 // without a pointee, no entry will be generated for it, therefore 7910 // we need to generate one after the whole struct has been processed. 7911 // Nonetheless, generateInfoForComponentList must be called to take 7912 // the pointer into account for the calculation of the range of the 7913 // partial struct. 7914 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 7915 /*ReturnDevicePointer=*/false, C->isImplicit()); 7916 DeferredInfo[nullptr].emplace_back(IE, VD); 7917 } else { 7918 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7919 this->CGF.EmitLValue(IE), IE->getExprLoc()); 7920 BasePointers.emplace_back(Ptr, VD); 7921 Pointers.push_back(Ptr); 7922 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7923 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 7924 } 7925 } 7926 } 7927 7928 for (const auto &M : Info) { 7929 // We need to know when we generate information for the first component 7930 // associated with a capture, because the mapping flags depend on it. 7931 bool IsFirstComponentList = true; 7932 7933 // Temporary versions of arrays 7934 MapBaseValuesArrayTy CurBasePointers; 7935 MapValuesArrayTy CurPointers; 7936 MapValuesArrayTy CurSizes; 7937 MapFlagsArrayTy CurTypes; 7938 StructRangeInfoTy PartialStruct; 7939 7940 for (const MapInfo &L : M.second) { 7941 assert(!L.Components.empty() && 7942 "Not expecting declaration with no component lists."); 7943 7944 // Remember the current base pointer index. 7945 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 7946 // FIXME: MSVC 2013 seems to require this-> to find the member method. 7947 this->generateInfoForComponentList( 7948 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 7949 CurPointers, CurSizes, CurTypes, PartialStruct, 7950 IsFirstComponentList, L.IsImplicit); 7951 7952 // If this entry relates with a device pointer, set the relevant 7953 // declaration and add the 'return pointer' flag. 7954 if (L.ReturnDevicePointer) { 7955 assert(CurBasePointers.size() > CurrentBasePointersIdx && 7956 "Unexpected number of mapped base pointers."); 7957 7958 const ValueDecl *RelevantVD = 7959 L.Components.back().getAssociatedDeclaration(); 7960 assert(RelevantVD && 7961 "No relevant declaration related with device pointer??"); 7962 7963 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 7964 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 7965 } 7966 IsFirstComponentList = false; 7967 } 7968 7969 // Append any pending zero-length pointers which are struct members and 7970 // used with use_device_ptr. 7971 auto CI = DeferredInfo.find(M.first); 7972 if (CI != DeferredInfo.end()) { 7973 for (const DeferredDevicePtrEntryTy &L : CI->second) { 7974 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 7975 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7976 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 7977 CurBasePointers.emplace_back(BasePtr, L.VD); 7978 CurPointers.push_back(Ptr); 7979 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7980 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 7981 // value MEMBER_OF=FFFF so that the entry is later updated with the 7982 // correct value of MEMBER_OF. 7983 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 7984 OMP_MAP_MEMBER_OF); 7985 } 7986 } 7987 7988 // If there is an entry in PartialStruct it means we have a struct with 7989 // individual members mapped. Emit an extra combined entry. 7990 if (PartialStruct.Base.isValid()) 7991 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 7992 PartialStruct); 7993 7994 // We need to append the results of this capture to what we already have. 7995 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7996 Pointers.append(CurPointers.begin(), CurPointers.end()); 7997 Sizes.append(CurSizes.begin(), CurSizes.end()); 7998 Types.append(CurTypes.begin(), CurTypes.end()); 7999 } 8000 } 8001 8002 /// Emit capture info for lambdas for variables captured by reference. 8003 void generateInfoForLambdaCaptures( 8004 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8005 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8006 MapFlagsArrayTy &Types, 8007 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8008 const auto *RD = VD->getType() 8009 .getCanonicalType() 8010 .getNonReferenceType() 8011 ->getAsCXXRecordDecl(); 8012 if (!RD || !RD->isLambda()) 8013 return; 8014 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8015 LValue VDLVal = CGF.MakeAddrLValue( 8016 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8017 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8018 FieldDecl *ThisCapture = nullptr; 8019 RD->getCaptureFields(Captures, ThisCapture); 8020 if (ThisCapture) { 8021 LValue ThisLVal = 8022 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8023 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8024 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 8025 BasePointers.push_back(ThisLVal.getPointer()); 8026 Pointers.push_back(ThisLValVal.getPointer()); 8027 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 8028 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8029 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8030 } 8031 for (const LambdaCapture &LC : RD->captures()) { 8032 if (LC.getCaptureKind() != LCK_ByRef) 8033 continue; 8034 const VarDecl *VD = LC.getCapturedVar(); 8035 auto It = Captures.find(VD); 8036 assert(It != Captures.end() && "Found lambda capture without field."); 8037 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8038 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8039 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8040 BasePointers.push_back(VarLVal.getPointer()); 8041 Pointers.push_back(VarLValVal.getPointer()); 8042 Sizes.push_back(CGF.getTypeSize( 8043 VD->getType().getCanonicalType().getNonReferenceType())); 8044 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8045 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8046 } 8047 } 8048 8049 /// Set correct indices for lambdas captures. 8050 void adjustMemberOfForLambdaCaptures( 8051 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8052 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8053 MapFlagsArrayTy &Types) const { 8054 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8055 // Set correct member_of idx for all implicit lambda captures. 8056 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8057 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8058 continue; 8059 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8060 assert(BasePtr && "Unable to find base lambda address."); 8061 int TgtIdx = -1; 8062 for (unsigned J = I; J > 0; --J) { 8063 unsigned Idx = J - 1; 8064 if (Pointers[Idx] != BasePtr) 8065 continue; 8066 TgtIdx = Idx; 8067 break; 8068 } 8069 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8070 // All other current entries will be MEMBER_OF the combined entry 8071 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8072 // 0xFFFF in the MEMBER_OF field). 8073 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8074 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8075 } 8076 } 8077 8078 /// Generate the base pointers, section pointers, sizes and map types 8079 /// associated to a given capture. 8080 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8081 llvm::Value *Arg, 8082 MapBaseValuesArrayTy &BasePointers, 8083 MapValuesArrayTy &Pointers, 8084 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8085 StructRangeInfoTy &PartialStruct) const { 8086 assert(!Cap->capturesVariableArrayType() && 8087 "Not expecting to generate map info for a variable array type!"); 8088 8089 // We need to know when we generating information for the first component 8090 const ValueDecl *VD = Cap->capturesThis() 8091 ? nullptr 8092 : Cap->getCapturedVar()->getCanonicalDecl(); 8093 8094 // If this declaration appears in a is_device_ptr clause we just have to 8095 // pass the pointer by value. If it is a reference to a declaration, we just 8096 // pass its value. 8097 if (DevPointersMap.count(VD)) { 8098 BasePointers.emplace_back(Arg, VD); 8099 Pointers.push_back(Arg); 8100 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 8101 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8102 return; 8103 } 8104 8105 using MapData = 8106 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8107 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8108 SmallVector<MapData, 4> DeclComponentLists; 8109 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 8110 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8111 for (const auto &L : C->decl_component_lists(VD)) { 8112 assert(L.first == VD && 8113 "We got information for the wrong declaration??"); 8114 assert(!L.second.empty() && 8115 "Not expecting declaration with no component lists."); 8116 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8117 C->getMapTypeModifiers(), 8118 C->isImplicit()); 8119 } 8120 } 8121 8122 // Find overlapping elements (including the offset from the base element). 8123 llvm::SmallDenseMap< 8124 const MapData *, 8125 llvm::SmallVector< 8126 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8127 4> 8128 OverlappedData; 8129 size_t Count = 0; 8130 for (const MapData &L : DeclComponentLists) { 8131 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8132 OpenMPMapClauseKind MapType; 8133 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8134 bool IsImplicit; 8135 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8136 ++Count; 8137 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8138 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8139 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8140 auto CI = Components.rbegin(); 8141 auto CE = Components.rend(); 8142 auto SI = Components1.rbegin(); 8143 auto SE = Components1.rend(); 8144 for (; CI != CE && SI != SE; ++CI, ++SI) { 8145 if (CI->getAssociatedExpression()->getStmtClass() != 8146 SI->getAssociatedExpression()->getStmtClass()) 8147 break; 8148 // Are we dealing with different variables/fields? 8149 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8150 break; 8151 } 8152 // Found overlapping if, at least for one component, reached the head of 8153 // the components list. 8154 if (CI == CE || SI == SE) { 8155 assert((CI != CE || SI != SE) && 8156 "Unexpected full match of the mapping components."); 8157 const MapData &BaseData = CI == CE ? L : L1; 8158 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8159 SI == SE ? Components : Components1; 8160 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8161 OverlappedElements.getSecond().push_back(SubData); 8162 } 8163 } 8164 } 8165 // Sort the overlapped elements for each item. 8166 llvm::SmallVector<const FieldDecl *, 4> Layout; 8167 if (!OverlappedData.empty()) { 8168 if (const auto *CRD = 8169 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8170 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8171 else { 8172 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8173 Layout.append(RD->field_begin(), RD->field_end()); 8174 } 8175 } 8176 for (auto &Pair : OverlappedData) { 8177 llvm::sort( 8178 Pair.getSecond(), 8179 [&Layout]( 8180 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8181 OMPClauseMappableExprCommon::MappableExprComponentListRef 8182 Second) { 8183 auto CI = First.rbegin(); 8184 auto CE = First.rend(); 8185 auto SI = Second.rbegin(); 8186 auto SE = Second.rend(); 8187 for (; CI != CE && SI != SE; ++CI, ++SI) { 8188 if (CI->getAssociatedExpression()->getStmtClass() != 8189 SI->getAssociatedExpression()->getStmtClass()) 8190 break; 8191 // Are we dealing with different variables/fields? 8192 if (CI->getAssociatedDeclaration() != 8193 SI->getAssociatedDeclaration()) 8194 break; 8195 } 8196 8197 // Lists contain the same elements. 8198 if (CI == CE && SI == SE) 8199 return false; 8200 8201 // List with less elements is less than list with more elements. 8202 if (CI == CE || SI == SE) 8203 return CI == CE; 8204 8205 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8206 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8207 if (FD1->getParent() == FD2->getParent()) 8208 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8209 const auto It = 8210 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8211 return FD == FD1 || FD == FD2; 8212 }); 8213 return *It == FD1; 8214 }); 8215 } 8216 8217 // Associated with a capture, because the mapping flags depend on it. 8218 // Go through all of the elements with the overlapped elements. 8219 for (const auto &Pair : OverlappedData) { 8220 const MapData &L = *Pair.getFirst(); 8221 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8222 OpenMPMapClauseKind MapType; 8223 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8224 bool IsImplicit; 8225 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8226 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8227 OverlappedComponents = Pair.getSecond(); 8228 bool IsFirstComponentList = true; 8229 generateInfoForComponentList(MapType, MapModifiers, Components, 8230 BasePointers, Pointers, Sizes, Types, 8231 PartialStruct, IsFirstComponentList, 8232 IsImplicit, OverlappedComponents); 8233 } 8234 // Go through other elements without overlapped elements. 8235 bool IsFirstComponentList = OverlappedData.empty(); 8236 for (const MapData &L : DeclComponentLists) { 8237 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8238 OpenMPMapClauseKind MapType; 8239 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8240 bool IsImplicit; 8241 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8242 auto It = OverlappedData.find(&L); 8243 if (It == OverlappedData.end()) 8244 generateInfoForComponentList(MapType, MapModifiers, Components, 8245 BasePointers, Pointers, Sizes, Types, 8246 PartialStruct, IsFirstComponentList, 8247 IsImplicit); 8248 IsFirstComponentList = false; 8249 } 8250 } 8251 8252 /// Generate the base pointers, section pointers, sizes and map types 8253 /// associated with the declare target link variables. 8254 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8255 MapValuesArrayTy &Pointers, 8256 MapValuesArrayTy &Sizes, 8257 MapFlagsArrayTy &Types) const { 8258 // Map other list items in the map clause which are not captured variables 8259 // but "declare target link" global variables. 8260 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8261 for (const auto &L : C->component_lists()) { 8262 if (!L.first) 8263 continue; 8264 const auto *VD = dyn_cast<VarDecl>(L.first); 8265 if (!VD) 8266 continue; 8267 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8268 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8269 if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8270 continue; 8271 StructRangeInfoTy PartialStruct; 8272 generateInfoForComponentList( 8273 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8274 Pointers, Sizes, Types, PartialStruct, 8275 /*IsFirstComponentList=*/true, C->isImplicit()); 8276 assert(!PartialStruct.Base.isValid() && 8277 "No partial structs for declare target link expected."); 8278 } 8279 } 8280 } 8281 8282 /// Generate the default map information for a given capture \a CI, 8283 /// record field declaration \a RI and captured value \a CV. 8284 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8285 const FieldDecl &RI, llvm::Value *CV, 8286 MapBaseValuesArrayTy &CurBasePointers, 8287 MapValuesArrayTy &CurPointers, 8288 MapValuesArrayTy &CurSizes, 8289 MapFlagsArrayTy &CurMapTypes) const { 8290 // Do the default mapping. 8291 if (CI.capturesThis()) { 8292 CurBasePointers.push_back(CV); 8293 CurPointers.push_back(CV); 8294 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8295 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 8296 // Default map type. 8297 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8298 } else if (CI.capturesVariableByCopy()) { 8299 CurBasePointers.push_back(CV); 8300 CurPointers.push_back(CV); 8301 if (!RI.getType()->isAnyPointerType()) { 8302 // We have to signal to the runtime captures passed by value that are 8303 // not pointers. 8304 CurMapTypes.push_back(OMP_MAP_LITERAL); 8305 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 8306 } else { 8307 // Pointers are implicitly mapped with a zero size and no flags 8308 // (other than first map that is added for all implicit maps). 8309 CurMapTypes.push_back(OMP_MAP_NONE); 8310 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 8311 } 8312 } else { 8313 assert(CI.capturesVariable() && "Expected captured reference."); 8314 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8315 QualType ElementType = PtrTy->getPointeeType(); 8316 CurSizes.push_back(CGF.getTypeSize(ElementType)); 8317 // The default map type for a scalar/complex type is 'to' because by 8318 // default the value doesn't have to be retrieved. For an aggregate 8319 // type, the default is 'tofrom'. 8320 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8321 const VarDecl *VD = CI.getCapturedVar(); 8322 if (FirstPrivateDecls.count(VD) && 8323 VD->getType().isConstant(CGF.getContext())) { 8324 llvm::Constant *Addr = 8325 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8326 // Copy the value of the original variable to the new global copy. 8327 CGF.Builder.CreateMemCpy( 8328 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8329 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8330 CurSizes.back(), /*isVolatile=*/false); 8331 // Use new global variable as the base pointers. 8332 CurBasePointers.push_back(Addr); 8333 CurPointers.push_back(Addr); 8334 } else { 8335 CurBasePointers.push_back(CV); 8336 if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) { 8337 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8338 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8339 AlignmentSource::Decl)); 8340 CurPointers.push_back(PtrAddr.getPointer()); 8341 } else { 8342 CurPointers.push_back(CV); 8343 } 8344 } 8345 } 8346 // Every default map produces a single argument which is a target parameter. 8347 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8348 8349 // Add flag stating this is an implicit map. 8350 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8351 } 8352 }; 8353 8354 enum OpenMPOffloadingReservedDeviceIDs { 8355 /// Device ID if the device was not defined, runtime should get it 8356 /// from environment variables in the spec. 8357 OMP_DEVICEID_UNDEF = -1, 8358 }; 8359 } // anonymous namespace 8360 8361 /// Emit the arrays used to pass the captures and map information to the 8362 /// offloading runtime library. If there is no map or capture information, 8363 /// return nullptr by reference. 8364 static void 8365 emitOffloadingArrays(CodeGenFunction &CGF, 8366 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8367 MappableExprsHandler::MapValuesArrayTy &Pointers, 8368 MappableExprsHandler::MapValuesArrayTy &Sizes, 8369 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8370 CGOpenMPRuntime::TargetDataInfo &Info) { 8371 CodeGenModule &CGM = CGF.CGM; 8372 ASTContext &Ctx = CGF.getContext(); 8373 8374 // Reset the array information. 8375 Info.clearArrayInfo(); 8376 Info.NumberOfPtrs = BasePointers.size(); 8377 8378 if (Info.NumberOfPtrs) { 8379 // Detect if we have any capture size requiring runtime evaluation of the 8380 // size so that a constant array could be eventually used. 8381 bool hasRuntimeEvaluationCaptureSize = false; 8382 for (llvm::Value *S : Sizes) 8383 if (!isa<llvm::Constant>(S)) { 8384 hasRuntimeEvaluationCaptureSize = true; 8385 break; 8386 } 8387 8388 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8389 QualType PointerArrayType = 8390 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 8391 /*IndexTypeQuals=*/0); 8392 8393 Info.BasePointersArray = 8394 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8395 Info.PointersArray = 8396 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8397 8398 // If we don't have any VLA types or other types that require runtime 8399 // evaluation, we can use a constant array for the map sizes, otherwise we 8400 // need to fill up the arrays as we do for the pointers. 8401 if (hasRuntimeEvaluationCaptureSize) { 8402 QualType SizeArrayType = Ctx.getConstantArrayType( 8403 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 8404 /*IndexTypeQuals=*/0); 8405 Info.SizesArray = 8406 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8407 } else { 8408 // We expect all the sizes to be constant, so we collect them to create 8409 // a constant array. 8410 SmallVector<llvm::Constant *, 16> ConstSizes; 8411 for (llvm::Value *S : Sizes) 8412 ConstSizes.push_back(cast<llvm::Constant>(S)); 8413 8414 auto *SizesArrayInit = llvm::ConstantArray::get( 8415 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 8416 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8417 auto *SizesArrayGbl = new llvm::GlobalVariable( 8418 CGM.getModule(), SizesArrayInit->getType(), 8419 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8420 SizesArrayInit, Name); 8421 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8422 Info.SizesArray = SizesArrayGbl; 8423 } 8424 8425 // The map types are always constant so we don't need to generate code to 8426 // fill arrays. Instead, we create an array constant. 8427 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8428 llvm::copy(MapTypes, Mapping.begin()); 8429 llvm::Constant *MapTypesArrayInit = 8430 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8431 std::string MaptypesName = 8432 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8433 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8434 CGM.getModule(), MapTypesArrayInit->getType(), 8435 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8436 MapTypesArrayInit, MaptypesName); 8437 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8438 Info.MapTypesArray = MapTypesArrayGbl; 8439 8440 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8441 llvm::Value *BPVal = *BasePointers[I]; 8442 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8443 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8444 Info.BasePointersArray, 0, I); 8445 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8446 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8447 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8448 CGF.Builder.CreateStore(BPVal, BPAddr); 8449 8450 if (Info.requiresDevicePointerInfo()) 8451 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8452 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8453 8454 llvm::Value *PVal = Pointers[I]; 8455 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8456 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8457 Info.PointersArray, 0, I); 8458 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8459 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8460 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8461 CGF.Builder.CreateStore(PVal, PAddr); 8462 8463 if (hasRuntimeEvaluationCaptureSize) { 8464 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8465 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 8466 Info.SizesArray, 8467 /*Idx0=*/0, 8468 /*Idx1=*/I); 8469 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 8470 CGF.Builder.CreateStore( 8471 CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), 8472 SAddr); 8473 } 8474 } 8475 } 8476 } 8477 /// Emit the arguments to be passed to the runtime library based on the 8478 /// arrays of pointers, sizes and map types. 8479 static void emitOffloadingArraysArgument( 8480 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8481 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8482 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8483 CodeGenModule &CGM = CGF.CGM; 8484 if (Info.NumberOfPtrs) { 8485 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8486 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8487 Info.BasePointersArray, 8488 /*Idx0=*/0, /*Idx1=*/0); 8489 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8490 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8491 Info.PointersArray, 8492 /*Idx0=*/0, 8493 /*Idx1=*/0); 8494 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8495 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 8496 /*Idx0=*/0, /*Idx1=*/0); 8497 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8498 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8499 Info.MapTypesArray, 8500 /*Idx0=*/0, 8501 /*Idx1=*/0); 8502 } else { 8503 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8504 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8505 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 8506 MapTypesArrayArg = 8507 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8508 } 8509 } 8510 8511 /// Check for inner distribute directive. 8512 static const OMPExecutableDirective * 8513 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8514 const auto *CS = D.getInnermostCapturedStmt(); 8515 const auto *Body = 8516 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8517 const Stmt *ChildStmt = 8518 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8519 8520 if (const auto *NestedDir = 8521 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8522 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8523 switch (D.getDirectiveKind()) { 8524 case OMPD_target: 8525 if (isOpenMPDistributeDirective(DKind)) 8526 return NestedDir; 8527 if (DKind == OMPD_teams) { 8528 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8529 /*IgnoreCaptured=*/true); 8530 if (!Body) 8531 return nullptr; 8532 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8533 if (const auto *NND = 8534 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8535 DKind = NND->getDirectiveKind(); 8536 if (isOpenMPDistributeDirective(DKind)) 8537 return NND; 8538 } 8539 } 8540 return nullptr; 8541 case OMPD_target_teams: 8542 if (isOpenMPDistributeDirective(DKind)) 8543 return NestedDir; 8544 return nullptr; 8545 case OMPD_target_parallel: 8546 case OMPD_target_simd: 8547 case OMPD_target_parallel_for: 8548 case OMPD_target_parallel_for_simd: 8549 return nullptr; 8550 case OMPD_target_teams_distribute: 8551 case OMPD_target_teams_distribute_simd: 8552 case OMPD_target_teams_distribute_parallel_for: 8553 case OMPD_target_teams_distribute_parallel_for_simd: 8554 case OMPD_parallel: 8555 case OMPD_for: 8556 case OMPD_parallel_for: 8557 case OMPD_parallel_sections: 8558 case OMPD_for_simd: 8559 case OMPD_parallel_for_simd: 8560 case OMPD_cancel: 8561 case OMPD_cancellation_point: 8562 case OMPD_ordered: 8563 case OMPD_threadprivate: 8564 case OMPD_allocate: 8565 case OMPD_task: 8566 case OMPD_simd: 8567 case OMPD_sections: 8568 case OMPD_section: 8569 case OMPD_single: 8570 case OMPD_master: 8571 case OMPD_critical: 8572 case OMPD_taskyield: 8573 case OMPD_barrier: 8574 case OMPD_taskwait: 8575 case OMPD_taskgroup: 8576 case OMPD_atomic: 8577 case OMPD_flush: 8578 case OMPD_teams: 8579 case OMPD_target_data: 8580 case OMPD_target_exit_data: 8581 case OMPD_target_enter_data: 8582 case OMPD_distribute: 8583 case OMPD_distribute_simd: 8584 case OMPD_distribute_parallel_for: 8585 case OMPD_distribute_parallel_for_simd: 8586 case OMPD_teams_distribute: 8587 case OMPD_teams_distribute_simd: 8588 case OMPD_teams_distribute_parallel_for: 8589 case OMPD_teams_distribute_parallel_for_simd: 8590 case OMPD_target_update: 8591 case OMPD_declare_simd: 8592 case OMPD_declare_target: 8593 case OMPD_end_declare_target: 8594 case OMPD_declare_reduction: 8595 case OMPD_declare_mapper: 8596 case OMPD_taskloop: 8597 case OMPD_taskloop_simd: 8598 case OMPD_requires: 8599 case OMPD_unknown: 8600 llvm_unreachable("Unexpected directive."); 8601 } 8602 } 8603 8604 return nullptr; 8605 } 8606 8607 void CGOpenMPRuntime::emitTargetNumIterationsCall( 8608 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 8609 const llvm::function_ref<llvm::Value *( 8610 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 8611 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 8612 const OMPExecutableDirective *TD = &D; 8613 // Get nested teams distribute kind directive, if any. 8614 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 8615 TD = getNestedDistributeDirective(CGM.getContext(), D); 8616 if (!TD) 8617 return; 8618 const auto *LD = cast<OMPLoopDirective>(TD); 8619 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 8620 PrePostActionTy &) { 8621 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 8622 8623 // Emit device ID if any. 8624 llvm::Value *DeviceID; 8625 if (Device) 8626 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8627 CGF.Int64Ty, /*isSigned=*/true); 8628 else 8629 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8630 8631 llvm::Value *Args[] = {DeviceID, NumIterations}; 8632 CGF.EmitRuntimeCall( 8633 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 8634 }; 8635 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 8636 } 8637 8638 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 8639 const OMPExecutableDirective &D, 8640 llvm::Function *OutlinedFn, 8641 llvm::Value *OutlinedFnID, 8642 const Expr *IfCond, const Expr *Device) { 8643 if (!CGF.HaveInsertPoint()) 8644 return; 8645 8646 assert(OutlinedFn && "Invalid outlined function!"); 8647 8648 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 8649 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 8650 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 8651 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 8652 PrePostActionTy &) { 8653 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8654 }; 8655 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 8656 8657 CodeGenFunction::OMPTargetDataInfo InputInfo; 8658 llvm::Value *MapTypesArray = nullptr; 8659 // Fill up the pointer arrays and transfer execution to the device. 8660 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 8661 &MapTypesArray, &CS, RequiresOuterTask, 8662 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 8663 // On top of the arrays that were filled up, the target offloading call 8664 // takes as arguments the device id as well as the host pointer. The host 8665 // pointer is used by the runtime library to identify the current target 8666 // region, so it only has to be unique and not necessarily point to 8667 // anything. It could be the pointer to the outlined function that 8668 // implements the target region, but we aren't using that so that the 8669 // compiler doesn't need to keep that, and could therefore inline the host 8670 // function if proven worthwhile during optimization. 8671 8672 // From this point on, we need to have an ID of the target region defined. 8673 assert(OutlinedFnID && "Invalid outlined function ID!"); 8674 8675 // Emit device ID if any. 8676 llvm::Value *DeviceID; 8677 if (Device) { 8678 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8679 CGF.Int64Ty, /*isSigned=*/true); 8680 } else { 8681 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8682 } 8683 8684 // Emit the number of elements in the offloading arrays. 8685 llvm::Value *PointerNum = 8686 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8687 8688 // Return value of the runtime offloading call. 8689 llvm::Value *Return; 8690 8691 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 8692 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 8693 8694 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8695 // The target region is an outlined function launched by the runtime 8696 // via calls __tgt_target() or __tgt_target_teams(). 8697 // 8698 // __tgt_target() launches a target region with one team and one thread, 8699 // executing a serial region. This master thread may in turn launch 8700 // more threads within its team upon encountering a parallel region, 8701 // however, no additional teams can be launched on the device. 8702 // 8703 // __tgt_target_teams() launches a target region with one or more teams, 8704 // each with one or more threads. This call is required for target 8705 // constructs such as: 8706 // 'target teams' 8707 // 'target' / 'teams' 8708 // 'target teams distribute parallel for' 8709 // 'target parallel' 8710 // and so on. 8711 // 8712 // Note that on the host and CPU targets, the runtime implementation of 8713 // these calls simply call the outlined function without forking threads. 8714 // The outlined functions themselves have runtime calls to 8715 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 8716 // the compiler in emitTeamsCall() and emitParallelCall(). 8717 // 8718 // In contrast, on the NVPTX target, the implementation of 8719 // __tgt_target_teams() launches a GPU kernel with the requested number 8720 // of teams and threads so no additional calls to the runtime are required. 8721 if (NumTeams) { 8722 // If we have NumTeams defined this means that we have an enclosed teams 8723 // region. Therefore we also expect to have NumThreads defined. These two 8724 // values should be defined in the presence of a teams directive, 8725 // regardless of having any clauses associated. If the user is using teams 8726 // but no clauses, these two values will be the default that should be 8727 // passed to the runtime library - a 32-bit integer with the value zero. 8728 assert(NumThreads && "Thread limit expression should be available along " 8729 "with number of teams."); 8730 llvm::Value *OffloadingArgs[] = {DeviceID, 8731 OutlinedFnID, 8732 PointerNum, 8733 InputInfo.BasePointersArray.getPointer(), 8734 InputInfo.PointersArray.getPointer(), 8735 InputInfo.SizesArray.getPointer(), 8736 MapTypesArray, 8737 NumTeams, 8738 NumThreads}; 8739 Return = CGF.EmitRuntimeCall( 8740 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 8741 : OMPRTL__tgt_target_teams), 8742 OffloadingArgs); 8743 } else { 8744 llvm::Value *OffloadingArgs[] = {DeviceID, 8745 OutlinedFnID, 8746 PointerNum, 8747 InputInfo.BasePointersArray.getPointer(), 8748 InputInfo.PointersArray.getPointer(), 8749 InputInfo.SizesArray.getPointer(), 8750 MapTypesArray}; 8751 Return = CGF.EmitRuntimeCall( 8752 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 8753 : OMPRTL__tgt_target), 8754 OffloadingArgs); 8755 } 8756 8757 // Check the error code and execute the host version if required. 8758 llvm::BasicBlock *OffloadFailedBlock = 8759 CGF.createBasicBlock("omp_offload.failed"); 8760 llvm::BasicBlock *OffloadContBlock = 8761 CGF.createBasicBlock("omp_offload.cont"); 8762 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 8763 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 8764 8765 CGF.EmitBlock(OffloadFailedBlock); 8766 if (RequiresOuterTask) { 8767 CapturedVars.clear(); 8768 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8769 } 8770 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8771 CGF.EmitBranch(OffloadContBlock); 8772 8773 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 8774 }; 8775 8776 // Notify that the host version must be executed. 8777 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 8778 RequiresOuterTask](CodeGenFunction &CGF, 8779 PrePostActionTy &) { 8780 if (RequiresOuterTask) { 8781 CapturedVars.clear(); 8782 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8783 } 8784 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8785 }; 8786 8787 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 8788 &CapturedVars, RequiresOuterTask, 8789 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 8790 // Fill up the arrays with all the captured variables. 8791 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8792 MappableExprsHandler::MapValuesArrayTy Pointers; 8793 MappableExprsHandler::MapValuesArrayTy Sizes; 8794 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8795 8796 // Get mappable expression information. 8797 MappableExprsHandler MEHandler(D, CGF); 8798 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 8799 8800 auto RI = CS.getCapturedRecordDecl()->field_begin(); 8801 auto CV = CapturedVars.begin(); 8802 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 8803 CE = CS.capture_end(); 8804 CI != CE; ++CI, ++RI, ++CV) { 8805 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 8806 MappableExprsHandler::MapValuesArrayTy CurPointers; 8807 MappableExprsHandler::MapValuesArrayTy CurSizes; 8808 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 8809 MappableExprsHandler::StructRangeInfoTy PartialStruct; 8810 8811 // VLA sizes are passed to the outlined region by copy and do not have map 8812 // information associated. 8813 if (CI->capturesVariableArrayType()) { 8814 CurBasePointers.push_back(*CV); 8815 CurPointers.push_back(*CV); 8816 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 8817 // Copy to the device as an argument. No need to retrieve it. 8818 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 8819 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 8820 } else { 8821 // If we have any information in the map clause, we use it, otherwise we 8822 // just do a default mapping. 8823 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 8824 CurSizes, CurMapTypes, PartialStruct); 8825 if (CurBasePointers.empty()) 8826 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 8827 CurPointers, CurSizes, CurMapTypes); 8828 // Generate correct mapping for variables captured by reference in 8829 // lambdas. 8830 if (CI->capturesVariable()) 8831 MEHandler.generateInfoForLambdaCaptures( 8832 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 8833 CurMapTypes, LambdaPointers); 8834 } 8835 // We expect to have at least an element of information for this capture. 8836 assert(!CurBasePointers.empty() && 8837 "Non-existing map pointer for capture!"); 8838 assert(CurBasePointers.size() == CurPointers.size() && 8839 CurBasePointers.size() == CurSizes.size() && 8840 CurBasePointers.size() == CurMapTypes.size() && 8841 "Inconsistent map information sizes!"); 8842 8843 // If there is an entry in PartialStruct it means we have a struct with 8844 // individual members mapped. Emit an extra combined entry. 8845 if (PartialStruct.Base.isValid()) 8846 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 8847 CurMapTypes, PartialStruct); 8848 8849 // We need to append the results of this capture to what we already have. 8850 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8851 Pointers.append(CurPointers.begin(), CurPointers.end()); 8852 Sizes.append(CurSizes.begin(), CurSizes.end()); 8853 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 8854 } 8855 // Adjust MEMBER_OF flags for the lambdas captures. 8856 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 8857 Pointers, MapTypes); 8858 // Map other list items in the map clause which are not captured variables 8859 // but "declare target link" global variables. 8860 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 8861 MapTypes); 8862 8863 TargetDataInfo Info; 8864 // Fill up the arrays and create the arguments. 8865 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8866 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8867 Info.PointersArray, Info.SizesArray, 8868 Info.MapTypesArray, Info); 8869 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8870 InputInfo.BasePointersArray = 8871 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8872 InputInfo.PointersArray = 8873 Address(Info.PointersArray, CGM.getPointerAlign()); 8874 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 8875 MapTypesArray = Info.MapTypesArray; 8876 if (RequiresOuterTask) 8877 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8878 else 8879 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8880 }; 8881 8882 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 8883 CodeGenFunction &CGF, PrePostActionTy &) { 8884 if (RequiresOuterTask) { 8885 CodeGenFunction::OMPTargetDataInfo InputInfo; 8886 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 8887 } else { 8888 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 8889 } 8890 }; 8891 8892 // If we have a target function ID it means that we need to support 8893 // offloading, otherwise, just execute on the host. We need to execute on host 8894 // regardless of the conditional in the if clause if, e.g., the user do not 8895 // specify target triples. 8896 if (OutlinedFnID) { 8897 if (IfCond) { 8898 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 8899 } else { 8900 RegionCodeGenTy ThenRCG(TargetThenGen); 8901 ThenRCG(CGF); 8902 } 8903 } else { 8904 RegionCodeGenTy ElseRCG(TargetElseGen); 8905 ElseRCG(CGF); 8906 } 8907 } 8908 8909 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 8910 StringRef ParentName) { 8911 if (!S) 8912 return; 8913 8914 // Codegen OMP target directives that offload compute to the device. 8915 bool RequiresDeviceCodegen = 8916 isa<OMPExecutableDirective>(S) && 8917 isOpenMPTargetExecutionDirective( 8918 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 8919 8920 if (RequiresDeviceCodegen) { 8921 const auto &E = *cast<OMPExecutableDirective>(S); 8922 unsigned DeviceID; 8923 unsigned FileID; 8924 unsigned Line; 8925 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 8926 FileID, Line); 8927 8928 // Is this a target region that should not be emitted as an entry point? If 8929 // so just signal we are done with this target region. 8930 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 8931 ParentName, Line)) 8932 return; 8933 8934 switch (E.getDirectiveKind()) { 8935 case OMPD_target: 8936 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 8937 cast<OMPTargetDirective>(E)); 8938 break; 8939 case OMPD_target_parallel: 8940 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 8941 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 8942 break; 8943 case OMPD_target_teams: 8944 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 8945 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 8946 break; 8947 case OMPD_target_teams_distribute: 8948 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 8949 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 8950 break; 8951 case OMPD_target_teams_distribute_simd: 8952 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 8953 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 8954 break; 8955 case OMPD_target_parallel_for: 8956 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 8957 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 8958 break; 8959 case OMPD_target_parallel_for_simd: 8960 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 8961 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 8962 break; 8963 case OMPD_target_simd: 8964 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 8965 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 8966 break; 8967 case OMPD_target_teams_distribute_parallel_for: 8968 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 8969 CGM, ParentName, 8970 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 8971 break; 8972 case OMPD_target_teams_distribute_parallel_for_simd: 8973 CodeGenFunction:: 8974 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 8975 CGM, ParentName, 8976 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 8977 break; 8978 case OMPD_parallel: 8979 case OMPD_for: 8980 case OMPD_parallel_for: 8981 case OMPD_parallel_sections: 8982 case OMPD_for_simd: 8983 case OMPD_parallel_for_simd: 8984 case OMPD_cancel: 8985 case OMPD_cancellation_point: 8986 case OMPD_ordered: 8987 case OMPD_threadprivate: 8988 case OMPD_allocate: 8989 case OMPD_task: 8990 case OMPD_simd: 8991 case OMPD_sections: 8992 case OMPD_section: 8993 case OMPD_single: 8994 case OMPD_master: 8995 case OMPD_critical: 8996 case OMPD_taskyield: 8997 case OMPD_barrier: 8998 case OMPD_taskwait: 8999 case OMPD_taskgroup: 9000 case OMPD_atomic: 9001 case OMPD_flush: 9002 case OMPD_teams: 9003 case OMPD_target_data: 9004 case OMPD_target_exit_data: 9005 case OMPD_target_enter_data: 9006 case OMPD_distribute: 9007 case OMPD_distribute_simd: 9008 case OMPD_distribute_parallel_for: 9009 case OMPD_distribute_parallel_for_simd: 9010 case OMPD_teams_distribute: 9011 case OMPD_teams_distribute_simd: 9012 case OMPD_teams_distribute_parallel_for: 9013 case OMPD_teams_distribute_parallel_for_simd: 9014 case OMPD_target_update: 9015 case OMPD_declare_simd: 9016 case OMPD_declare_target: 9017 case OMPD_end_declare_target: 9018 case OMPD_declare_reduction: 9019 case OMPD_declare_mapper: 9020 case OMPD_taskloop: 9021 case OMPD_taskloop_simd: 9022 case OMPD_requires: 9023 case OMPD_unknown: 9024 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9025 } 9026 return; 9027 } 9028 9029 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9030 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9031 return; 9032 9033 scanForTargetRegionsFunctions( 9034 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9035 return; 9036 } 9037 9038 // If this is a lambda function, look into its body. 9039 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9040 S = L->getBody(); 9041 9042 // Keep looking for target regions recursively. 9043 for (const Stmt *II : S->children()) 9044 scanForTargetRegionsFunctions(II, ParentName); 9045 } 9046 9047 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9048 // If emitting code for the host, we do not process FD here. Instead we do 9049 // the normal code generation. 9050 if (!CGM.getLangOpts().OpenMPIsDevice) 9051 return false; 9052 9053 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9054 StringRef Name = CGM.getMangledName(GD); 9055 // Try to detect target regions in the function. 9056 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) 9057 scanForTargetRegionsFunctions(FD->getBody(), Name); 9058 9059 // Do not to emit function if it is not marked as declare target. 9060 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9061 AlreadyEmittedTargetFunctions.count(Name) == 0; 9062 } 9063 9064 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9065 if (!CGM.getLangOpts().OpenMPIsDevice) 9066 return false; 9067 9068 // Check if there are Ctors/Dtors in this declaration and look for target 9069 // regions in it. We use the complete variant to produce the kernel name 9070 // mangling. 9071 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9072 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9073 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9074 StringRef ParentName = 9075 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9076 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9077 } 9078 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9079 StringRef ParentName = 9080 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9081 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9082 } 9083 } 9084 9085 // Do not to emit variable if it is not marked as declare target. 9086 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9087 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9088 cast<VarDecl>(GD.getDecl())); 9089 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { 9090 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9091 return true; 9092 } 9093 return false; 9094 } 9095 9096 llvm::Constant * 9097 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9098 const VarDecl *VD) { 9099 assert(VD->getType().isConstant(CGM.getContext()) && 9100 "Expected constant variable."); 9101 StringRef VarName; 9102 llvm::Constant *Addr; 9103 llvm::GlobalValue::LinkageTypes Linkage; 9104 QualType Ty = VD->getType(); 9105 SmallString<128> Buffer; 9106 { 9107 unsigned DeviceID; 9108 unsigned FileID; 9109 unsigned Line; 9110 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9111 FileID, Line); 9112 llvm::raw_svector_ostream OS(Buffer); 9113 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9114 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9115 VarName = OS.str(); 9116 } 9117 Linkage = llvm::GlobalValue::InternalLinkage; 9118 Addr = 9119 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9120 getDefaultFirstprivateAddressSpace()); 9121 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9122 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9123 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9124 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9125 VarName, Addr, VarSize, 9126 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9127 return Addr; 9128 } 9129 9130 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9131 llvm::Constant *Addr) { 9132 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9133 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9134 if (!Res) { 9135 if (CGM.getLangOpts().OpenMPIsDevice) { 9136 // Register non-target variables being emitted in device code (debug info 9137 // may cause this). 9138 StringRef VarName = CGM.getMangledName(VD); 9139 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9140 } 9141 return; 9142 } 9143 // Register declare target variables. 9144 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9145 StringRef VarName; 9146 CharUnits VarSize; 9147 llvm::GlobalValue::LinkageTypes Linkage; 9148 switch (*Res) { 9149 case OMPDeclareTargetDeclAttr::MT_To: 9150 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9151 VarName = CGM.getMangledName(VD); 9152 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9153 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9154 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9155 } else { 9156 VarSize = CharUnits::Zero(); 9157 } 9158 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9159 // Temp solution to prevent optimizations of the internal variables. 9160 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9161 std::string RefName = getName({VarName, "ref"}); 9162 if (!CGM.GetGlobalValue(RefName)) { 9163 llvm::Constant *AddrRef = 9164 getOrCreateInternalVariable(Addr->getType(), RefName); 9165 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9166 GVAddrRef->setConstant(/*Val=*/true); 9167 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9168 GVAddrRef->setInitializer(Addr); 9169 CGM.addCompilerUsedGlobal(GVAddrRef); 9170 } 9171 } 9172 break; 9173 case OMPDeclareTargetDeclAttr::MT_Link: 9174 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9175 if (CGM.getLangOpts().OpenMPIsDevice) { 9176 VarName = Addr->getName(); 9177 Addr = nullptr; 9178 } else { 9179 VarName = getAddrOfDeclareTargetLink(VD).getName(); 9180 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); 9181 } 9182 VarSize = CGM.getPointerSize(); 9183 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9184 break; 9185 } 9186 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9187 VarName, Addr, VarSize, Flags, Linkage); 9188 } 9189 9190 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9191 if (isa<FunctionDecl>(GD.getDecl()) || 9192 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9193 return emitTargetFunctions(GD); 9194 9195 return emitTargetGlobalVariable(GD); 9196 } 9197 9198 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9199 for (const VarDecl *VD : DeferredGlobalVariables) { 9200 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9201 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9202 if (!Res) 9203 continue; 9204 if (*Res == OMPDeclareTargetDeclAttr::MT_To) { 9205 CGM.EmitGlobal(VD); 9206 } else { 9207 assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && 9208 "Expected to or link clauses."); 9209 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 9210 } 9211 } 9212 } 9213 9214 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9215 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9216 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9217 " Expected target-based directive."); 9218 } 9219 9220 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9221 const OMPRequiresDecl *D) { 9222 for (const OMPClause *Clause : D->clauselists()) { 9223 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9224 HasRequiresUnifiedSharedMemory = true; 9225 break; 9226 } 9227 } 9228 } 9229 9230 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9231 LangAS &AS) { 9232 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9233 return false; 9234 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9235 switch(A->getAllocatorType()) { 9236 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9237 // Not supported, fallback to the default mem space. 9238 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9239 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9240 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9241 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9242 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9243 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9244 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9245 AS = LangAS::Default; 9246 return true; 9247 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9248 llvm_unreachable("Expected predefined allocator for the variables with the " 9249 "static storage."); 9250 } 9251 return false; 9252 } 9253 9254 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9255 CodeGenModule &CGM) 9256 : CGM(CGM) { 9257 if (CGM.getLangOpts().OpenMPIsDevice) { 9258 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9259 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9260 } 9261 } 9262 9263 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9264 if (CGM.getLangOpts().OpenMPIsDevice) 9265 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9266 } 9267 9268 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9269 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9270 return true; 9271 9272 StringRef Name = CGM.getMangledName(GD); 9273 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9274 // Do not to emit function if it is marked as declare target as it was already 9275 // emitted. 9276 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9277 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9278 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9279 return !F->isDeclaration(); 9280 return false; 9281 } 9282 return true; 9283 } 9284 9285 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9286 } 9287 9288 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9289 // If we don't have entries or if we are emitting code for the device, we 9290 // don't need to do anything. 9291 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9292 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9293 (OffloadEntriesInfoManager.empty() && 9294 !HasEmittedDeclareTargetRegion && 9295 !HasEmittedTargetRegion)) 9296 return nullptr; 9297 9298 // Create and register the function that handles the requires directives. 9299 ASTContext &C = CGM.getContext(); 9300 9301 llvm::Function *RequiresRegFn; 9302 { 9303 CodeGenFunction CGF(CGM); 9304 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9305 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9306 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9307 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9308 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9309 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9310 // TODO: check for other requires clauses. 9311 // The requires directive takes effect only when a target region is 9312 // present in the compilation unit. Otherwise it is ignored and not 9313 // passed to the runtime. This avoids the runtime from throwing an error 9314 // for mismatching requires clauses across compilation units that don't 9315 // contain at least 1 target region. 9316 assert((HasEmittedTargetRegion || 9317 HasEmittedDeclareTargetRegion || 9318 !OffloadEntriesInfoManager.empty()) && 9319 "Target or declare target region expected."); 9320 if (HasRequiresUnifiedSharedMemory) 9321 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9322 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9323 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9324 CGF.FinishFunction(); 9325 } 9326 return RequiresRegFn; 9327 } 9328 9329 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 9330 // If we have offloading in the current module, we need to emit the entries 9331 // now and register the offloading descriptor. 9332 createOffloadEntriesAndInfoMetadata(); 9333 9334 // Create and register the offloading binary descriptors. This is the main 9335 // entity that captures all the information about offloading in the current 9336 // compilation unit. 9337 return createOffloadingBinaryDescriptorRegistration(); 9338 } 9339 9340 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9341 const OMPExecutableDirective &D, 9342 SourceLocation Loc, 9343 llvm::Function *OutlinedFn, 9344 ArrayRef<llvm::Value *> CapturedVars) { 9345 if (!CGF.HaveInsertPoint()) 9346 return; 9347 9348 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9349 CodeGenFunction::RunCleanupsScope Scope(CGF); 9350 9351 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9352 llvm::Value *Args[] = { 9353 RTLoc, 9354 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9355 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9356 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9357 RealArgs.append(std::begin(Args), std::end(Args)); 9358 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9359 9360 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9361 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9362 } 9363 9364 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9365 const Expr *NumTeams, 9366 const Expr *ThreadLimit, 9367 SourceLocation Loc) { 9368 if (!CGF.HaveInsertPoint()) 9369 return; 9370 9371 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9372 9373 llvm::Value *NumTeamsVal = 9374 NumTeams 9375 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9376 CGF.CGM.Int32Ty, /* isSigned = */ true) 9377 : CGF.Builder.getInt32(0); 9378 9379 llvm::Value *ThreadLimitVal = 9380 ThreadLimit 9381 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9382 CGF.CGM.Int32Ty, /* isSigned = */ true) 9383 : CGF.Builder.getInt32(0); 9384 9385 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9386 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9387 ThreadLimitVal}; 9388 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9389 PushNumTeamsArgs); 9390 } 9391 9392 void CGOpenMPRuntime::emitTargetDataCalls( 9393 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9394 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9395 if (!CGF.HaveInsertPoint()) 9396 return; 9397 9398 // Action used to replace the default codegen action and turn privatization 9399 // off. 9400 PrePostActionTy NoPrivAction; 9401 9402 // Generate the code for the opening of the data environment. Capture all the 9403 // arguments of the runtime call by reference because they are used in the 9404 // closing of the region. 9405 auto &&BeginThenGen = [this, &D, Device, &Info, 9406 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9407 // Fill up the arrays with all the mapped variables. 9408 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9409 MappableExprsHandler::MapValuesArrayTy Pointers; 9410 MappableExprsHandler::MapValuesArrayTy Sizes; 9411 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9412 9413 // Get map clause information. 9414 MappableExprsHandler MCHandler(D, CGF); 9415 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9416 9417 // Fill up the arrays and create the arguments. 9418 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9419 9420 llvm::Value *BasePointersArrayArg = nullptr; 9421 llvm::Value *PointersArrayArg = nullptr; 9422 llvm::Value *SizesArrayArg = nullptr; 9423 llvm::Value *MapTypesArrayArg = nullptr; 9424 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9425 SizesArrayArg, MapTypesArrayArg, Info); 9426 9427 // Emit device ID if any. 9428 llvm::Value *DeviceID = nullptr; 9429 if (Device) { 9430 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9431 CGF.Int64Ty, /*isSigned=*/true); 9432 } else { 9433 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9434 } 9435 9436 // Emit the number of elements in the offloading arrays. 9437 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9438 9439 llvm::Value *OffloadingArgs[] = { 9440 DeviceID, PointerNum, BasePointersArrayArg, 9441 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9442 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 9443 OffloadingArgs); 9444 9445 // If device pointer privatization is required, emit the body of the region 9446 // here. It will have to be duplicated: with and without privatization. 9447 if (!Info.CaptureDeviceAddrMap.empty()) 9448 CodeGen(CGF); 9449 }; 9450 9451 // Generate code for the closing of the data region. 9452 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 9453 PrePostActionTy &) { 9454 assert(Info.isValid() && "Invalid data environment closing arguments."); 9455 9456 llvm::Value *BasePointersArrayArg = nullptr; 9457 llvm::Value *PointersArrayArg = nullptr; 9458 llvm::Value *SizesArrayArg = nullptr; 9459 llvm::Value *MapTypesArrayArg = nullptr; 9460 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9461 SizesArrayArg, MapTypesArrayArg, Info); 9462 9463 // Emit device ID if any. 9464 llvm::Value *DeviceID = nullptr; 9465 if (Device) { 9466 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9467 CGF.Int64Ty, /*isSigned=*/true); 9468 } else { 9469 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9470 } 9471 9472 // Emit the number of elements in the offloading arrays. 9473 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9474 9475 llvm::Value *OffloadingArgs[] = { 9476 DeviceID, PointerNum, BasePointersArrayArg, 9477 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9478 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 9479 OffloadingArgs); 9480 }; 9481 9482 // If we need device pointer privatization, we need to emit the body of the 9483 // region with no privatization in the 'else' branch of the conditional. 9484 // Otherwise, we don't have to do anything. 9485 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 9486 PrePostActionTy &) { 9487 if (!Info.CaptureDeviceAddrMap.empty()) { 9488 CodeGen.setAction(NoPrivAction); 9489 CodeGen(CGF); 9490 } 9491 }; 9492 9493 // We don't have to do anything to close the region if the if clause evaluates 9494 // to false. 9495 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 9496 9497 if (IfCond) { 9498 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 9499 } else { 9500 RegionCodeGenTy RCG(BeginThenGen); 9501 RCG(CGF); 9502 } 9503 9504 // If we don't require privatization of device pointers, we emit the body in 9505 // between the runtime calls. This avoids duplicating the body code. 9506 if (Info.CaptureDeviceAddrMap.empty()) { 9507 CodeGen.setAction(NoPrivAction); 9508 CodeGen(CGF); 9509 } 9510 9511 if (IfCond) { 9512 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 9513 } else { 9514 RegionCodeGenTy RCG(EndThenGen); 9515 RCG(CGF); 9516 } 9517 } 9518 9519 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 9520 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9521 const Expr *Device) { 9522 if (!CGF.HaveInsertPoint()) 9523 return; 9524 9525 assert((isa<OMPTargetEnterDataDirective>(D) || 9526 isa<OMPTargetExitDataDirective>(D) || 9527 isa<OMPTargetUpdateDirective>(D)) && 9528 "Expecting either target enter, exit data, or update directives."); 9529 9530 CodeGenFunction::OMPTargetDataInfo InputInfo; 9531 llvm::Value *MapTypesArray = nullptr; 9532 // Generate the code for the opening of the data environment. 9533 auto &&ThenGen = [this, &D, Device, &InputInfo, 9534 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 9535 // Emit device ID if any. 9536 llvm::Value *DeviceID = nullptr; 9537 if (Device) { 9538 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9539 CGF.Int64Ty, /*isSigned=*/true); 9540 } else { 9541 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9542 } 9543 9544 // Emit the number of elements in the offloading arrays. 9545 llvm::Constant *PointerNum = 9546 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9547 9548 llvm::Value *OffloadingArgs[] = {DeviceID, 9549 PointerNum, 9550 InputInfo.BasePointersArray.getPointer(), 9551 InputInfo.PointersArray.getPointer(), 9552 InputInfo.SizesArray.getPointer(), 9553 MapTypesArray}; 9554 9555 // Select the right runtime function call for each expected standalone 9556 // directive. 9557 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9558 OpenMPRTLFunction RTLFn; 9559 switch (D.getDirectiveKind()) { 9560 case OMPD_target_enter_data: 9561 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 9562 : OMPRTL__tgt_target_data_begin; 9563 break; 9564 case OMPD_target_exit_data: 9565 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 9566 : OMPRTL__tgt_target_data_end; 9567 break; 9568 case OMPD_target_update: 9569 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 9570 : OMPRTL__tgt_target_data_update; 9571 break; 9572 case OMPD_parallel: 9573 case OMPD_for: 9574 case OMPD_parallel_for: 9575 case OMPD_parallel_sections: 9576 case OMPD_for_simd: 9577 case OMPD_parallel_for_simd: 9578 case OMPD_cancel: 9579 case OMPD_cancellation_point: 9580 case OMPD_ordered: 9581 case OMPD_threadprivate: 9582 case OMPD_allocate: 9583 case OMPD_task: 9584 case OMPD_simd: 9585 case OMPD_sections: 9586 case OMPD_section: 9587 case OMPD_single: 9588 case OMPD_master: 9589 case OMPD_critical: 9590 case OMPD_taskyield: 9591 case OMPD_barrier: 9592 case OMPD_taskwait: 9593 case OMPD_taskgroup: 9594 case OMPD_atomic: 9595 case OMPD_flush: 9596 case OMPD_teams: 9597 case OMPD_target_data: 9598 case OMPD_distribute: 9599 case OMPD_distribute_simd: 9600 case OMPD_distribute_parallel_for: 9601 case OMPD_distribute_parallel_for_simd: 9602 case OMPD_teams_distribute: 9603 case OMPD_teams_distribute_simd: 9604 case OMPD_teams_distribute_parallel_for: 9605 case OMPD_teams_distribute_parallel_for_simd: 9606 case OMPD_declare_simd: 9607 case OMPD_declare_target: 9608 case OMPD_end_declare_target: 9609 case OMPD_declare_reduction: 9610 case OMPD_declare_mapper: 9611 case OMPD_taskloop: 9612 case OMPD_taskloop_simd: 9613 case OMPD_target: 9614 case OMPD_target_simd: 9615 case OMPD_target_teams_distribute: 9616 case OMPD_target_teams_distribute_simd: 9617 case OMPD_target_teams_distribute_parallel_for: 9618 case OMPD_target_teams_distribute_parallel_for_simd: 9619 case OMPD_target_teams: 9620 case OMPD_target_parallel: 9621 case OMPD_target_parallel_for: 9622 case OMPD_target_parallel_for_simd: 9623 case OMPD_requires: 9624 case OMPD_unknown: 9625 llvm_unreachable("Unexpected standalone target data directive."); 9626 break; 9627 } 9628 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 9629 }; 9630 9631 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 9632 CodeGenFunction &CGF, PrePostActionTy &) { 9633 // Fill up the arrays with all the mapped variables. 9634 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9635 MappableExprsHandler::MapValuesArrayTy Pointers; 9636 MappableExprsHandler::MapValuesArrayTy Sizes; 9637 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9638 9639 // Get map clause information. 9640 MappableExprsHandler MEHandler(D, CGF); 9641 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9642 9643 TargetDataInfo Info; 9644 // Fill up the arrays and create the arguments. 9645 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9646 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9647 Info.PointersArray, Info.SizesArray, 9648 Info.MapTypesArray, Info); 9649 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9650 InputInfo.BasePointersArray = 9651 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9652 InputInfo.PointersArray = 9653 Address(Info.PointersArray, CGM.getPointerAlign()); 9654 InputInfo.SizesArray = 9655 Address(Info.SizesArray, CGM.getPointerAlign()); 9656 MapTypesArray = Info.MapTypesArray; 9657 if (D.hasClausesOfKind<OMPDependClause>()) 9658 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9659 else 9660 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9661 }; 9662 9663 if (IfCond) { 9664 emitOMPIfClause(CGF, IfCond, TargetThenGen, 9665 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 9666 } else { 9667 RegionCodeGenTy ThenRCG(TargetThenGen); 9668 ThenRCG(CGF); 9669 } 9670 } 9671 9672 namespace { 9673 /// Kind of parameter in a function with 'declare simd' directive. 9674 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 9675 /// Attribute set of the parameter. 9676 struct ParamAttrTy { 9677 ParamKindTy Kind = Vector; 9678 llvm::APSInt StrideOrArg; 9679 llvm::APSInt Alignment; 9680 }; 9681 } // namespace 9682 9683 static unsigned evaluateCDTSize(const FunctionDecl *FD, 9684 ArrayRef<ParamAttrTy> ParamAttrs) { 9685 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 9686 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 9687 // of that clause. The VLEN value must be power of 2. 9688 // In other case the notion of the function`s "characteristic data type" (CDT) 9689 // is used to compute the vector length. 9690 // CDT is defined in the following order: 9691 // a) For non-void function, the CDT is the return type. 9692 // b) If the function has any non-uniform, non-linear parameters, then the 9693 // CDT is the type of the first such parameter. 9694 // c) If the CDT determined by a) or b) above is struct, union, or class 9695 // type which is pass-by-value (except for the type that maps to the 9696 // built-in complex data type), the characteristic data type is int. 9697 // d) If none of the above three cases is applicable, the CDT is int. 9698 // The VLEN is then determined based on the CDT and the size of vector 9699 // register of that ISA for which current vector version is generated. The 9700 // VLEN is computed using the formula below: 9701 // VLEN = sizeof(vector_register) / sizeof(CDT), 9702 // where vector register size specified in section 3.2.1 Registers and the 9703 // Stack Frame of original AMD64 ABI document. 9704 QualType RetType = FD->getReturnType(); 9705 if (RetType.isNull()) 9706 return 0; 9707 ASTContext &C = FD->getASTContext(); 9708 QualType CDT; 9709 if (!RetType.isNull() && !RetType->isVoidType()) { 9710 CDT = RetType; 9711 } else { 9712 unsigned Offset = 0; 9713 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 9714 if (ParamAttrs[Offset].Kind == Vector) 9715 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 9716 ++Offset; 9717 } 9718 if (CDT.isNull()) { 9719 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9720 if (ParamAttrs[I + Offset].Kind == Vector) { 9721 CDT = FD->getParamDecl(I)->getType(); 9722 break; 9723 } 9724 } 9725 } 9726 } 9727 if (CDT.isNull()) 9728 CDT = C.IntTy; 9729 CDT = CDT->getCanonicalTypeUnqualified(); 9730 if (CDT->isRecordType() || CDT->isUnionType()) 9731 CDT = C.IntTy; 9732 return C.getTypeSize(CDT); 9733 } 9734 9735 static void 9736 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 9737 const llvm::APSInt &VLENVal, 9738 ArrayRef<ParamAttrTy> ParamAttrs, 9739 OMPDeclareSimdDeclAttr::BranchStateTy State) { 9740 struct ISADataTy { 9741 char ISA; 9742 unsigned VecRegSize; 9743 }; 9744 ISADataTy ISAData[] = { 9745 { 9746 'b', 128 9747 }, // SSE 9748 { 9749 'c', 256 9750 }, // AVX 9751 { 9752 'd', 256 9753 }, // AVX2 9754 { 9755 'e', 512 9756 }, // AVX512 9757 }; 9758 llvm::SmallVector<char, 2> Masked; 9759 switch (State) { 9760 case OMPDeclareSimdDeclAttr::BS_Undefined: 9761 Masked.push_back('N'); 9762 Masked.push_back('M'); 9763 break; 9764 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9765 Masked.push_back('N'); 9766 break; 9767 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9768 Masked.push_back('M'); 9769 break; 9770 } 9771 for (char Mask : Masked) { 9772 for (const ISADataTy &Data : ISAData) { 9773 SmallString<256> Buffer; 9774 llvm::raw_svector_ostream Out(Buffer); 9775 Out << "_ZGV" << Data.ISA << Mask; 9776 if (!VLENVal) { 9777 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 9778 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 9779 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 9780 } else { 9781 Out << VLENVal; 9782 } 9783 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 9784 switch (ParamAttr.Kind){ 9785 case LinearWithVarStride: 9786 Out << 's' << ParamAttr.StrideOrArg; 9787 break; 9788 case Linear: 9789 Out << 'l'; 9790 if (!!ParamAttr.StrideOrArg) 9791 Out << ParamAttr.StrideOrArg; 9792 break; 9793 case Uniform: 9794 Out << 'u'; 9795 break; 9796 case Vector: 9797 Out << 'v'; 9798 break; 9799 } 9800 if (!!ParamAttr.Alignment) 9801 Out << 'a' << ParamAttr.Alignment; 9802 } 9803 Out << '_' << Fn->getName(); 9804 Fn->addFnAttr(Out.str()); 9805 } 9806 } 9807 } 9808 9809 // This are the Functions that are needed to mangle the name of the 9810 // vector functions generated by the compiler, according to the rules 9811 // defined in the "Vector Function ABI specifications for AArch64", 9812 // available at 9813 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 9814 9815 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 9816 /// 9817 /// TODO: Need to implement the behavior for reference marked with a 9818 /// var or no linear modifiers (1.b in the section). For this, we 9819 /// need to extend ParamKindTy to support the linear modifiers. 9820 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 9821 QT = QT.getCanonicalType(); 9822 9823 if (QT->isVoidType()) 9824 return false; 9825 9826 if (Kind == ParamKindTy::Uniform) 9827 return false; 9828 9829 if (Kind == ParamKindTy::Linear) 9830 return false; 9831 9832 // TODO: Handle linear references with modifiers 9833 9834 if (Kind == ParamKindTy::LinearWithVarStride) 9835 return false; 9836 9837 return true; 9838 } 9839 9840 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 9841 static bool getAArch64PBV(QualType QT, ASTContext &C) { 9842 QT = QT.getCanonicalType(); 9843 unsigned Size = C.getTypeSize(QT); 9844 9845 // Only scalars and complex within 16 bytes wide set PVB to true. 9846 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 9847 return false; 9848 9849 if (QT->isFloatingType()) 9850 return true; 9851 9852 if (QT->isIntegerType()) 9853 return true; 9854 9855 if (QT->isPointerType()) 9856 return true; 9857 9858 // TODO: Add support for complex types (section 3.1.2, item 2). 9859 9860 return false; 9861 } 9862 9863 /// Computes the lane size (LS) of a return type or of an input parameter, 9864 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 9865 /// TODO: Add support for references, section 3.2.1, item 1. 9866 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 9867 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 9868 QualType PTy = QT.getCanonicalType()->getPointeeType(); 9869 if (getAArch64PBV(PTy, C)) 9870 return C.getTypeSize(PTy); 9871 } 9872 if (getAArch64PBV(QT, C)) 9873 return C.getTypeSize(QT); 9874 9875 return C.getTypeSize(C.getUIntPtrType()); 9876 } 9877 9878 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 9879 // signature of the scalar function, as defined in 3.2.2 of the 9880 // AAVFABI. 9881 static std::tuple<unsigned, unsigned, bool> 9882 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 9883 QualType RetType = FD->getReturnType().getCanonicalType(); 9884 9885 ASTContext &C = FD->getASTContext(); 9886 9887 bool OutputBecomesInput = false; 9888 9889 llvm::SmallVector<unsigned, 8> Sizes; 9890 if (!RetType->isVoidType()) { 9891 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 9892 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 9893 OutputBecomesInput = true; 9894 } 9895 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9896 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 9897 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 9898 } 9899 9900 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 9901 // The LS of a function parameter / return value can only be a power 9902 // of 2, starting from 8 bits, up to 128. 9903 assert(std::all_of(Sizes.begin(), Sizes.end(), 9904 [](unsigned Size) { 9905 return Size == 8 || Size == 16 || Size == 32 || 9906 Size == 64 || Size == 128; 9907 }) && 9908 "Invalid size"); 9909 9910 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 9911 *std::max_element(std::begin(Sizes), std::end(Sizes)), 9912 OutputBecomesInput); 9913 } 9914 9915 /// Mangle the parameter part of the vector function name according to 9916 /// their OpenMP classification. The mangling function is defined in 9917 /// section 3.5 of the AAVFABI. 9918 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 9919 SmallString<256> Buffer; 9920 llvm::raw_svector_ostream Out(Buffer); 9921 for (const auto &ParamAttr : ParamAttrs) { 9922 switch (ParamAttr.Kind) { 9923 case LinearWithVarStride: 9924 Out << "ls" << ParamAttr.StrideOrArg; 9925 break; 9926 case Linear: 9927 Out << 'l'; 9928 // Don't print the step value if it is not present or if it is 9929 // equal to 1. 9930 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 9931 Out << ParamAttr.StrideOrArg; 9932 break; 9933 case Uniform: 9934 Out << 'u'; 9935 break; 9936 case Vector: 9937 Out << 'v'; 9938 break; 9939 } 9940 9941 if (!!ParamAttr.Alignment) 9942 Out << 'a' << ParamAttr.Alignment; 9943 } 9944 9945 return Out.str(); 9946 } 9947 9948 // Function used to add the attribute. The parameter `VLEN` is 9949 // templated to allow the use of "x" when targeting scalable functions 9950 // for SVE. 9951 template <typename T> 9952 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 9953 char ISA, StringRef ParSeq, 9954 StringRef MangledName, bool OutputBecomesInput, 9955 llvm::Function *Fn) { 9956 SmallString<256> Buffer; 9957 llvm::raw_svector_ostream Out(Buffer); 9958 Out << Prefix << ISA << LMask << VLEN; 9959 if (OutputBecomesInput) 9960 Out << "v"; 9961 Out << ParSeq << "_" << MangledName; 9962 Fn->addFnAttr(Out.str()); 9963 } 9964 9965 // Helper function to generate the Advanced SIMD names depending on 9966 // the value of the NDS when simdlen is not present. 9967 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 9968 StringRef Prefix, char ISA, 9969 StringRef ParSeq, StringRef MangledName, 9970 bool OutputBecomesInput, 9971 llvm::Function *Fn) { 9972 switch (NDS) { 9973 case 8: 9974 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 9975 OutputBecomesInput, Fn); 9976 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 9977 OutputBecomesInput, Fn); 9978 break; 9979 case 16: 9980 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 9981 OutputBecomesInput, Fn); 9982 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 9983 OutputBecomesInput, Fn); 9984 break; 9985 case 32: 9986 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 9987 OutputBecomesInput, Fn); 9988 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 9989 OutputBecomesInput, Fn); 9990 break; 9991 case 64: 9992 case 128: 9993 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 9994 OutputBecomesInput, Fn); 9995 break; 9996 default: 9997 llvm_unreachable("Scalar type is too wide."); 9998 } 9999 } 10000 10001 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10002 static void emitAArch64DeclareSimdFunction( 10003 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10004 ArrayRef<ParamAttrTy> ParamAttrs, 10005 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10006 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10007 10008 // Get basic data for building the vector signature. 10009 const auto Data = getNDSWDS(FD, ParamAttrs); 10010 const unsigned NDS = std::get<0>(Data); 10011 const unsigned WDS = std::get<1>(Data); 10012 const bool OutputBecomesInput = std::get<2>(Data); 10013 10014 // Check the values provided via `simdlen` by the user. 10015 // 1. A `simdlen(1)` doesn't produce vector signatures, 10016 if (UserVLEN == 1) { 10017 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10018 DiagnosticsEngine::Warning, 10019 "The clause simdlen(1) has no effect when targeting aarch64."); 10020 CGM.getDiags().Report(SLoc, DiagID); 10021 return; 10022 } 10023 10024 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10025 // Advanced SIMD output. 10026 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10027 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10028 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10029 "power of 2 when targeting Advanced SIMD."); 10030 CGM.getDiags().Report(SLoc, DiagID); 10031 return; 10032 } 10033 10034 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10035 // limits. 10036 if (ISA == 's' && UserVLEN != 0) { 10037 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10038 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10039 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10040 "lanes in the architectural constraints " 10041 "for SVE (min is 128-bit, max is " 10042 "2048-bit, by steps of 128-bit)"); 10043 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10044 return; 10045 } 10046 } 10047 10048 // Sort out parameter sequence. 10049 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10050 StringRef Prefix = "_ZGV"; 10051 // Generate simdlen from user input (if any). 10052 if (UserVLEN) { 10053 if (ISA == 's') { 10054 // SVE generates only a masked function. 10055 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10056 OutputBecomesInput, Fn); 10057 } else { 10058 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10059 // Advanced SIMD generates one or two functions, depending on 10060 // the `[not]inbranch` clause. 10061 switch (State) { 10062 case OMPDeclareSimdDeclAttr::BS_Undefined: 10063 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10064 OutputBecomesInput, Fn); 10065 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10066 OutputBecomesInput, Fn); 10067 break; 10068 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10069 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10070 OutputBecomesInput, Fn); 10071 break; 10072 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10073 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10074 OutputBecomesInput, Fn); 10075 break; 10076 } 10077 } 10078 } else { 10079 // If no user simdlen is provided, follow the AAVFABI rules for 10080 // generating the vector length. 10081 if (ISA == 's') { 10082 // SVE, section 3.4.1, item 1. 10083 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10084 OutputBecomesInput, Fn); 10085 } else { 10086 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10087 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10088 // two vector names depending on the use of the clause 10089 // `[not]inbranch`. 10090 switch (State) { 10091 case OMPDeclareSimdDeclAttr::BS_Undefined: 10092 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10093 OutputBecomesInput, Fn); 10094 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10095 OutputBecomesInput, Fn); 10096 break; 10097 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10098 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10099 OutputBecomesInput, Fn); 10100 break; 10101 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10102 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10103 OutputBecomesInput, Fn); 10104 break; 10105 } 10106 } 10107 } 10108 } 10109 10110 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10111 llvm::Function *Fn) { 10112 ASTContext &C = CGM.getContext(); 10113 FD = FD->getMostRecentDecl(); 10114 // Map params to their positions in function decl. 10115 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10116 if (isa<CXXMethodDecl>(FD)) 10117 ParamPositions.try_emplace(FD, 0); 10118 unsigned ParamPos = ParamPositions.size(); 10119 for (const ParmVarDecl *P : FD->parameters()) { 10120 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10121 ++ParamPos; 10122 } 10123 while (FD) { 10124 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10125 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10126 // Mark uniform parameters. 10127 for (const Expr *E : Attr->uniforms()) { 10128 E = E->IgnoreParenImpCasts(); 10129 unsigned Pos; 10130 if (isa<CXXThisExpr>(E)) { 10131 Pos = ParamPositions[FD]; 10132 } else { 10133 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10134 ->getCanonicalDecl(); 10135 Pos = ParamPositions[PVD]; 10136 } 10137 ParamAttrs[Pos].Kind = Uniform; 10138 } 10139 // Get alignment info. 10140 auto NI = Attr->alignments_begin(); 10141 for (const Expr *E : Attr->aligneds()) { 10142 E = E->IgnoreParenImpCasts(); 10143 unsigned Pos; 10144 QualType ParmTy; 10145 if (isa<CXXThisExpr>(E)) { 10146 Pos = ParamPositions[FD]; 10147 ParmTy = E->getType(); 10148 } else { 10149 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10150 ->getCanonicalDecl(); 10151 Pos = ParamPositions[PVD]; 10152 ParmTy = PVD->getType(); 10153 } 10154 ParamAttrs[Pos].Alignment = 10155 (*NI) 10156 ? (*NI)->EvaluateKnownConstInt(C) 10157 : llvm::APSInt::getUnsigned( 10158 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10159 .getQuantity()); 10160 ++NI; 10161 } 10162 // Mark linear parameters. 10163 auto SI = Attr->steps_begin(); 10164 auto MI = Attr->modifiers_begin(); 10165 for (const Expr *E : Attr->linears()) { 10166 E = E->IgnoreParenImpCasts(); 10167 unsigned Pos; 10168 if (isa<CXXThisExpr>(E)) { 10169 Pos = ParamPositions[FD]; 10170 } else { 10171 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10172 ->getCanonicalDecl(); 10173 Pos = ParamPositions[PVD]; 10174 } 10175 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10176 ParamAttr.Kind = Linear; 10177 if (*SI) { 10178 Expr::EvalResult Result; 10179 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10180 if (const auto *DRE = 10181 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10182 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10183 ParamAttr.Kind = LinearWithVarStride; 10184 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10185 ParamPositions[StridePVD->getCanonicalDecl()]); 10186 } 10187 } 10188 } else { 10189 ParamAttr.StrideOrArg = Result.Val.getInt(); 10190 } 10191 } 10192 ++SI; 10193 ++MI; 10194 } 10195 llvm::APSInt VLENVal; 10196 SourceLocation ExprLoc; 10197 const Expr *VLENExpr = Attr->getSimdlen(); 10198 if (VLENExpr) { 10199 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10200 ExprLoc = VLENExpr->getExprLoc(); 10201 } 10202 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10203 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10204 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10205 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10206 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10207 unsigned VLEN = VLENVal.getExtValue(); 10208 StringRef MangledName = Fn->getName(); 10209 if (CGM.getTarget().hasFeature("sve")) 10210 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10211 MangledName, 's', 128, Fn, ExprLoc); 10212 if (CGM.getTarget().hasFeature("neon")) 10213 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10214 MangledName, 'n', 128, Fn, ExprLoc); 10215 } 10216 } 10217 FD = FD->getPreviousDecl(); 10218 } 10219 } 10220 10221 namespace { 10222 /// Cleanup action for doacross support. 10223 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10224 public: 10225 static const int DoacrossFinArgs = 2; 10226 10227 private: 10228 llvm::FunctionCallee RTLFn; 10229 llvm::Value *Args[DoacrossFinArgs]; 10230 10231 public: 10232 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10233 ArrayRef<llvm::Value *> CallArgs) 10234 : RTLFn(RTLFn) { 10235 assert(CallArgs.size() == DoacrossFinArgs); 10236 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10237 } 10238 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10239 if (!CGF.HaveInsertPoint()) 10240 return; 10241 CGF.EmitRuntimeCall(RTLFn, Args); 10242 } 10243 }; 10244 } // namespace 10245 10246 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10247 const OMPLoopDirective &D, 10248 ArrayRef<Expr *> NumIterations) { 10249 if (!CGF.HaveInsertPoint()) 10250 return; 10251 10252 ASTContext &C = CGM.getContext(); 10253 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10254 RecordDecl *RD; 10255 if (KmpDimTy.isNull()) { 10256 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10257 // kmp_int64 lo; // lower 10258 // kmp_int64 up; // upper 10259 // kmp_int64 st; // stride 10260 // }; 10261 RD = C.buildImplicitRecord("kmp_dim"); 10262 RD->startDefinition(); 10263 addFieldToRecordDecl(C, RD, Int64Ty); 10264 addFieldToRecordDecl(C, RD, Int64Ty); 10265 addFieldToRecordDecl(C, RD, Int64Ty); 10266 RD->completeDefinition(); 10267 KmpDimTy = C.getRecordType(RD); 10268 } else { 10269 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10270 } 10271 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10272 QualType ArrayTy = 10273 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 10274 10275 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10276 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10277 enum { LowerFD = 0, UpperFD, StrideFD }; 10278 // Fill dims with data. 10279 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10280 LValue DimsLVal = CGF.MakeAddrLValue( 10281 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10282 // dims.upper = num_iterations; 10283 LValue UpperLVal = CGF.EmitLValueForField( 10284 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10285 llvm::Value *NumIterVal = 10286 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10287 D.getNumIterations()->getType(), Int64Ty, 10288 D.getNumIterations()->getExprLoc()); 10289 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10290 // dims.stride = 1; 10291 LValue StrideLVal = CGF.EmitLValueForField( 10292 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10293 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10294 StrideLVal); 10295 } 10296 10297 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10298 // kmp_int32 num_dims, struct kmp_dim * dims); 10299 llvm::Value *Args[] = { 10300 emitUpdateLocation(CGF, D.getBeginLoc()), 10301 getThreadID(CGF, D.getBeginLoc()), 10302 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10303 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10304 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10305 CGM.VoidPtrTy)}; 10306 10307 llvm::FunctionCallee RTLFn = 10308 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10309 CGF.EmitRuntimeCall(RTLFn, Args); 10310 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10311 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10312 llvm::FunctionCallee FiniRTLFn = 10313 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10314 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10315 llvm::makeArrayRef(FiniArgs)); 10316 } 10317 10318 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10319 const OMPDependClause *C) { 10320 QualType Int64Ty = 10321 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10322 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10323 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10324 Int64Ty, Size, ArrayType::Normal, 0); 10325 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10326 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10327 const Expr *CounterVal = C->getLoopData(I); 10328 assert(CounterVal); 10329 llvm::Value *CntVal = CGF.EmitScalarConversion( 10330 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10331 CounterVal->getExprLoc()); 10332 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10333 /*Volatile=*/false, Int64Ty); 10334 } 10335 llvm::Value *Args[] = { 10336 emitUpdateLocation(CGF, C->getBeginLoc()), 10337 getThreadID(CGF, C->getBeginLoc()), 10338 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10339 llvm::FunctionCallee RTLFn; 10340 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10341 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10342 } else { 10343 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10344 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10345 } 10346 CGF.EmitRuntimeCall(RTLFn, Args); 10347 } 10348 10349 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10350 llvm::FunctionCallee Callee, 10351 ArrayRef<llvm::Value *> Args) const { 10352 assert(Loc.isValid() && "Outlined function call location must be valid."); 10353 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10354 10355 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10356 if (Fn->doesNotThrow()) { 10357 CGF.EmitNounwindRuntimeCall(Fn, Args); 10358 return; 10359 } 10360 } 10361 CGF.EmitRuntimeCall(Callee, Args); 10362 } 10363 10364 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10365 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10366 ArrayRef<llvm::Value *> Args) const { 10367 emitCall(CGF, Loc, OutlinedFn, Args); 10368 } 10369 10370 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10371 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10372 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10373 HasEmittedDeclareTargetRegion = true; 10374 } 10375 10376 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10377 const VarDecl *NativeParam, 10378 const VarDecl *TargetParam) const { 10379 return CGF.GetAddrOfLocalVar(NativeParam); 10380 } 10381 10382 namespace { 10383 /// Cleanup action for allocate support. 10384 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10385 public: 10386 static const int CleanupArgs = 3; 10387 10388 private: 10389 llvm::FunctionCallee RTLFn; 10390 llvm::Value *Args[CleanupArgs]; 10391 10392 public: 10393 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10394 ArrayRef<llvm::Value *> CallArgs) 10395 : RTLFn(RTLFn) { 10396 assert(CallArgs.size() == CleanupArgs && 10397 "Size of arguments does not match."); 10398 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10399 } 10400 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10401 if (!CGF.HaveInsertPoint()) 10402 return; 10403 CGF.EmitRuntimeCall(RTLFn, Args); 10404 } 10405 }; 10406 } // namespace 10407 10408 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10409 const VarDecl *VD) { 10410 if (!VD) 10411 return Address::invalid(); 10412 const VarDecl *CVD = VD->getCanonicalDecl(); 10413 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10414 return Address::invalid(); 10415 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10416 // Use the default allocation. 10417 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10418 !AA->getAllocator()) 10419 return Address::invalid(); 10420 llvm::Value *Size; 10421 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10422 if (CVD->getType()->isVariablyModifiedType()) { 10423 Size = CGF.getTypeSize(CVD->getType()); 10424 // Align the size: ((size + align - 1) / align) * align 10425 Size = CGF.Builder.CreateNUWAdd( 10426 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10427 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10428 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10429 } else { 10430 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10431 Size = CGM.getSize(Sz.alignTo(Align)); 10432 } 10433 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10434 assert(AA->getAllocator() && 10435 "Expected allocator expression for non-default allocator."); 10436 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 10437 // According to the standard, the original allocator type is a enum (integer). 10438 // Convert to pointer type, if required. 10439 if (Allocator->getType()->isIntegerTy()) 10440 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 10441 else if (Allocator->getType()->isPointerTy()) 10442 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 10443 CGM.VoidPtrTy); 10444 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 10445 10446 llvm::Value *Addr = 10447 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 10448 CVD->getName() + ".void.addr"); 10449 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 10450 Allocator}; 10451 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 10452 10453 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10454 llvm::makeArrayRef(FiniArgs)); 10455 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10456 Addr, 10457 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 10458 CVD->getName() + ".addr"); 10459 return Address(Addr, Align); 10460 } 10461 10462 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 10463 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10464 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10465 llvm_unreachable("Not supported in SIMD-only mode"); 10466 } 10467 10468 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 10469 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10470 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10471 llvm_unreachable("Not supported in SIMD-only mode"); 10472 } 10473 10474 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 10475 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10476 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 10477 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 10478 bool Tied, unsigned &NumberOfParts) { 10479 llvm_unreachable("Not supported in SIMD-only mode"); 10480 } 10481 10482 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 10483 SourceLocation Loc, 10484 llvm::Function *OutlinedFn, 10485 ArrayRef<llvm::Value *> CapturedVars, 10486 const Expr *IfCond) { 10487 llvm_unreachable("Not supported in SIMD-only mode"); 10488 } 10489 10490 void CGOpenMPSIMDRuntime::emitCriticalRegion( 10491 CodeGenFunction &CGF, StringRef CriticalName, 10492 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 10493 const Expr *Hint) { 10494 llvm_unreachable("Not supported in SIMD-only mode"); 10495 } 10496 10497 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 10498 const RegionCodeGenTy &MasterOpGen, 10499 SourceLocation Loc) { 10500 llvm_unreachable("Not supported in SIMD-only mode"); 10501 } 10502 10503 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 10504 SourceLocation Loc) { 10505 llvm_unreachable("Not supported in SIMD-only mode"); 10506 } 10507 10508 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 10509 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 10510 SourceLocation Loc) { 10511 llvm_unreachable("Not supported in SIMD-only mode"); 10512 } 10513 10514 void CGOpenMPSIMDRuntime::emitSingleRegion( 10515 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 10516 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 10517 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 10518 ArrayRef<const Expr *> AssignmentOps) { 10519 llvm_unreachable("Not supported in SIMD-only mode"); 10520 } 10521 10522 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 10523 const RegionCodeGenTy &OrderedOpGen, 10524 SourceLocation Loc, 10525 bool IsThreads) { 10526 llvm_unreachable("Not supported in SIMD-only mode"); 10527 } 10528 10529 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 10530 SourceLocation Loc, 10531 OpenMPDirectiveKind Kind, 10532 bool EmitChecks, 10533 bool ForceSimpleCall) { 10534 llvm_unreachable("Not supported in SIMD-only mode"); 10535 } 10536 10537 void CGOpenMPSIMDRuntime::emitForDispatchInit( 10538 CodeGenFunction &CGF, SourceLocation Loc, 10539 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 10540 bool Ordered, const DispatchRTInput &DispatchValues) { 10541 llvm_unreachable("Not supported in SIMD-only mode"); 10542 } 10543 10544 void CGOpenMPSIMDRuntime::emitForStaticInit( 10545 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 10546 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 10547 llvm_unreachable("Not supported in SIMD-only mode"); 10548 } 10549 10550 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 10551 CodeGenFunction &CGF, SourceLocation Loc, 10552 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 10553 llvm_unreachable("Not supported in SIMD-only mode"); 10554 } 10555 10556 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 10557 SourceLocation Loc, 10558 unsigned IVSize, 10559 bool IVSigned) { 10560 llvm_unreachable("Not supported in SIMD-only mode"); 10561 } 10562 10563 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 10564 SourceLocation Loc, 10565 OpenMPDirectiveKind DKind) { 10566 llvm_unreachable("Not supported in SIMD-only mode"); 10567 } 10568 10569 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 10570 SourceLocation Loc, 10571 unsigned IVSize, bool IVSigned, 10572 Address IL, Address LB, 10573 Address UB, Address ST) { 10574 llvm_unreachable("Not supported in SIMD-only mode"); 10575 } 10576 10577 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 10578 llvm::Value *NumThreads, 10579 SourceLocation Loc) { 10580 llvm_unreachable("Not supported in SIMD-only mode"); 10581 } 10582 10583 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 10584 OpenMPProcBindClauseKind ProcBind, 10585 SourceLocation Loc) { 10586 llvm_unreachable("Not supported in SIMD-only mode"); 10587 } 10588 10589 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 10590 const VarDecl *VD, 10591 Address VDAddr, 10592 SourceLocation Loc) { 10593 llvm_unreachable("Not supported in SIMD-only mode"); 10594 } 10595 10596 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 10597 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 10598 CodeGenFunction *CGF) { 10599 llvm_unreachable("Not supported in SIMD-only mode"); 10600 } 10601 10602 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 10603 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 10604 llvm_unreachable("Not supported in SIMD-only mode"); 10605 } 10606 10607 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 10608 ArrayRef<const Expr *> Vars, 10609 SourceLocation Loc) { 10610 llvm_unreachable("Not supported in SIMD-only mode"); 10611 } 10612 10613 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 10614 const OMPExecutableDirective &D, 10615 llvm::Function *TaskFunction, 10616 QualType SharedsTy, Address Shareds, 10617 const Expr *IfCond, 10618 const OMPTaskDataTy &Data) { 10619 llvm_unreachable("Not supported in SIMD-only mode"); 10620 } 10621 10622 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 10623 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 10624 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 10625 const Expr *IfCond, const OMPTaskDataTy &Data) { 10626 llvm_unreachable("Not supported in SIMD-only mode"); 10627 } 10628 10629 void CGOpenMPSIMDRuntime::emitReduction( 10630 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 10631 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 10632 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 10633 assert(Options.SimpleReduction && "Only simple reduction is expected."); 10634 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 10635 ReductionOps, Options); 10636 } 10637 10638 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 10639 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 10640 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 10641 llvm_unreachable("Not supported in SIMD-only mode"); 10642 } 10643 10644 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 10645 SourceLocation Loc, 10646 ReductionCodeGen &RCG, 10647 unsigned N) { 10648 llvm_unreachable("Not supported in SIMD-only mode"); 10649 } 10650 10651 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 10652 SourceLocation Loc, 10653 llvm::Value *ReductionsPtr, 10654 LValue SharedLVal) { 10655 llvm_unreachable("Not supported in SIMD-only mode"); 10656 } 10657 10658 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 10659 SourceLocation Loc) { 10660 llvm_unreachable("Not supported in SIMD-only mode"); 10661 } 10662 10663 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 10664 CodeGenFunction &CGF, SourceLocation Loc, 10665 OpenMPDirectiveKind CancelRegion) { 10666 llvm_unreachable("Not supported in SIMD-only mode"); 10667 } 10668 10669 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 10670 SourceLocation Loc, const Expr *IfCond, 10671 OpenMPDirectiveKind CancelRegion) { 10672 llvm_unreachable("Not supported in SIMD-only mode"); 10673 } 10674 10675 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 10676 const OMPExecutableDirective &D, StringRef ParentName, 10677 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 10678 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 10679 llvm_unreachable("Not supported in SIMD-only mode"); 10680 } 10681 10682 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 10683 const OMPExecutableDirective &D, 10684 llvm::Function *OutlinedFn, 10685 llvm::Value *OutlinedFnID, 10686 const Expr *IfCond, 10687 const Expr *Device) { 10688 llvm_unreachable("Not supported in SIMD-only mode"); 10689 } 10690 10691 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 10692 llvm_unreachable("Not supported in SIMD-only mode"); 10693 } 10694 10695 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10696 llvm_unreachable("Not supported in SIMD-only mode"); 10697 } 10698 10699 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 10700 return false; 10701 } 10702 10703 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 10704 return nullptr; 10705 } 10706 10707 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 10708 const OMPExecutableDirective &D, 10709 SourceLocation Loc, 10710 llvm::Function *OutlinedFn, 10711 ArrayRef<llvm::Value *> CapturedVars) { 10712 llvm_unreachable("Not supported in SIMD-only mode"); 10713 } 10714 10715 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10716 const Expr *NumTeams, 10717 const Expr *ThreadLimit, 10718 SourceLocation Loc) { 10719 llvm_unreachable("Not supported in SIMD-only mode"); 10720 } 10721 10722 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 10723 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10724 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10725 llvm_unreachable("Not supported in SIMD-only mode"); 10726 } 10727 10728 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 10729 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10730 const Expr *Device) { 10731 llvm_unreachable("Not supported in SIMD-only mode"); 10732 } 10733 10734 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10735 const OMPLoopDirective &D, 10736 ArrayRef<Expr *> NumIterations) { 10737 llvm_unreachable("Not supported in SIMD-only mode"); 10738 } 10739 10740 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10741 const OMPDependClause *C) { 10742 llvm_unreachable("Not supported in SIMD-only mode"); 10743 } 10744 10745 const VarDecl * 10746 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 10747 const VarDecl *NativeParam) const { 10748 llvm_unreachable("Not supported in SIMD-only mode"); 10749 } 10750 10751 Address 10752 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 10753 const VarDecl *NativeParam, 10754 const VarDecl *TargetParam) const { 10755 llvm_unreachable("Not supported in SIMD-only mode"); 10756 } 10757