1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CGRecordLayout.h" 18 #include "CodeGenFunction.h" 19 #include "clang/CodeGen/ConstantInitBuilder.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/Basic/BitmaskEnum.h" 23 #include "llvm/ADT/ArrayRef.h" 24 #include "llvm/Bitcode/BitcodeReader.h" 25 #include "llvm/IR/CallSite.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/GlobalValue.h" 28 #include "llvm/IR/Value.h" 29 #include "llvm/Support/Format.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <cassert> 32 33 using namespace clang; 34 using namespace CodeGen; 35 36 namespace { 37 /// Base class for handling code generation inside OpenMP regions. 38 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 39 public: 40 /// Kinds of OpenMP regions used in codegen. 41 enum CGOpenMPRegionKind { 42 /// Region with outlined function for standalone 'parallel' 43 /// directive. 44 ParallelOutlinedRegion, 45 /// Region with outlined function for standalone 'task' directive. 46 TaskOutlinedRegion, 47 /// Region for constructs that do not require function outlining, 48 /// like 'for', 'sections', 'atomic' etc. directives. 49 InlinedRegion, 50 /// Region with outlined function for standalone 'target' directive. 51 TargetRegion, 52 }; 53 54 CGOpenMPRegionInfo(const CapturedStmt &CS, 55 const CGOpenMPRegionKind RegionKind, 56 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 57 bool HasCancel) 58 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 59 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 60 61 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 62 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 63 bool HasCancel) 64 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 65 Kind(Kind), HasCancel(HasCancel) {} 66 67 /// Get a variable or parameter for storing global thread id 68 /// inside OpenMP construct. 69 virtual const VarDecl *getThreadIDVariable() const = 0; 70 71 /// Emit the captured statement body. 72 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 73 74 /// Get an LValue for the current ThreadID variable. 75 /// \return LValue for thread id variable. This LValue always has type int32*. 76 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 77 78 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 79 80 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 81 82 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 83 84 bool hasCancel() const { return HasCancel; } 85 86 static bool classof(const CGCapturedStmtInfo *Info) { 87 return Info->getKind() == CR_OpenMP; 88 } 89 90 ~CGOpenMPRegionInfo() override = default; 91 92 protected: 93 CGOpenMPRegionKind RegionKind; 94 RegionCodeGenTy CodeGen; 95 OpenMPDirectiveKind Kind; 96 bool HasCancel; 97 }; 98 99 /// API for captured statement code generation in OpenMP constructs. 100 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 101 public: 102 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 103 const RegionCodeGenTy &CodeGen, 104 OpenMPDirectiveKind Kind, bool HasCancel, 105 StringRef HelperName) 106 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 107 HasCancel), 108 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 109 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 110 } 111 112 /// Get a variable or parameter for storing global thread id 113 /// inside OpenMP construct. 114 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 115 116 /// Get the name of the capture helper. 117 StringRef getHelperName() const override { return HelperName; } 118 119 static bool classof(const CGCapturedStmtInfo *Info) { 120 return CGOpenMPRegionInfo::classof(Info) && 121 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 122 ParallelOutlinedRegion; 123 } 124 125 private: 126 /// A variable or parameter storing global thread id for OpenMP 127 /// constructs. 128 const VarDecl *ThreadIDVar; 129 StringRef HelperName; 130 }; 131 132 /// API for captured statement code generation in OpenMP constructs. 133 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 134 public: 135 class UntiedTaskActionTy final : public PrePostActionTy { 136 bool Untied; 137 const VarDecl *PartIDVar; 138 const RegionCodeGenTy UntiedCodeGen; 139 llvm::SwitchInst *UntiedSwitch = nullptr; 140 141 public: 142 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 143 const RegionCodeGenTy &UntiedCodeGen) 144 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 145 void Enter(CodeGenFunction &CGF) override { 146 if (Untied) { 147 // Emit task switching point. 148 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 149 CGF.GetAddrOfLocalVar(PartIDVar), 150 PartIDVar->getType()->castAs<PointerType>()); 151 llvm::Value *Res = 152 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 153 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 154 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 155 CGF.EmitBlock(DoneBB); 156 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 157 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 158 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 159 CGF.Builder.GetInsertBlock()); 160 emitUntiedSwitch(CGF); 161 } 162 } 163 void emitUntiedSwitch(CodeGenFunction &CGF) const { 164 if (Untied) { 165 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 166 CGF.GetAddrOfLocalVar(PartIDVar), 167 PartIDVar->getType()->castAs<PointerType>()); 168 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 169 PartIdLVal); 170 UntiedCodeGen(CGF); 171 CodeGenFunction::JumpDest CurPoint = 172 CGF.getJumpDestInCurrentScope(".untied.next."); 173 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 174 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 175 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 176 CGF.Builder.GetInsertBlock()); 177 CGF.EmitBranchThroughCleanup(CurPoint); 178 CGF.EmitBlock(CurPoint.getBlock()); 179 } 180 } 181 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 182 }; 183 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 184 const VarDecl *ThreadIDVar, 185 const RegionCodeGenTy &CodeGen, 186 OpenMPDirectiveKind Kind, bool HasCancel, 187 const UntiedTaskActionTy &Action) 188 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 189 ThreadIDVar(ThreadIDVar), Action(Action) { 190 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 191 } 192 193 /// Get a variable or parameter for storing global thread id 194 /// inside OpenMP construct. 195 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 196 197 /// Get an LValue for the current ThreadID variable. 198 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 199 200 /// Get the name of the capture helper. 201 StringRef getHelperName() const override { return ".omp_outlined."; } 202 203 void emitUntiedSwitch(CodeGenFunction &CGF) override { 204 Action.emitUntiedSwitch(CGF); 205 } 206 207 static bool classof(const CGCapturedStmtInfo *Info) { 208 return CGOpenMPRegionInfo::classof(Info) && 209 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 210 TaskOutlinedRegion; 211 } 212 213 private: 214 /// A variable or parameter storing global thread id for OpenMP 215 /// constructs. 216 const VarDecl *ThreadIDVar; 217 /// Action for emitting code for untied tasks. 218 const UntiedTaskActionTy &Action; 219 }; 220 221 /// API for inlined captured statement code generation in OpenMP 222 /// constructs. 223 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 224 public: 225 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 226 const RegionCodeGenTy &CodeGen, 227 OpenMPDirectiveKind Kind, bool HasCancel) 228 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 229 OldCSI(OldCSI), 230 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 231 232 // Retrieve the value of the context parameter. 233 llvm::Value *getContextValue() const override { 234 if (OuterRegionInfo) 235 return OuterRegionInfo->getContextValue(); 236 llvm_unreachable("No context value for inlined OpenMP region"); 237 } 238 239 void setContextValue(llvm::Value *V) override { 240 if (OuterRegionInfo) { 241 OuterRegionInfo->setContextValue(V); 242 return; 243 } 244 llvm_unreachable("No context value for inlined OpenMP region"); 245 } 246 247 /// Lookup the captured field decl for a variable. 248 const FieldDecl *lookup(const VarDecl *VD) const override { 249 if (OuterRegionInfo) 250 return OuterRegionInfo->lookup(VD); 251 // If there is no outer outlined region,no need to lookup in a list of 252 // captured variables, we can use the original one. 253 return nullptr; 254 } 255 256 FieldDecl *getThisFieldDecl() const override { 257 if (OuterRegionInfo) 258 return OuterRegionInfo->getThisFieldDecl(); 259 return nullptr; 260 } 261 262 /// Get a variable or parameter for storing global thread id 263 /// inside OpenMP construct. 264 const VarDecl *getThreadIDVariable() const override { 265 if (OuterRegionInfo) 266 return OuterRegionInfo->getThreadIDVariable(); 267 return nullptr; 268 } 269 270 /// Get an LValue for the current ThreadID variable. 271 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 272 if (OuterRegionInfo) 273 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 274 llvm_unreachable("No LValue for inlined OpenMP construct"); 275 } 276 277 /// Get the name of the capture helper. 278 StringRef getHelperName() const override { 279 if (auto *OuterRegionInfo = getOldCSI()) 280 return OuterRegionInfo->getHelperName(); 281 llvm_unreachable("No helper name for inlined OpenMP construct"); 282 } 283 284 void emitUntiedSwitch(CodeGenFunction &CGF) override { 285 if (OuterRegionInfo) 286 OuterRegionInfo->emitUntiedSwitch(CGF); 287 } 288 289 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 290 291 static bool classof(const CGCapturedStmtInfo *Info) { 292 return CGOpenMPRegionInfo::classof(Info) && 293 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 294 } 295 296 ~CGOpenMPInlinedRegionInfo() override = default; 297 298 private: 299 /// CodeGen info about outer OpenMP region. 300 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 301 CGOpenMPRegionInfo *OuterRegionInfo; 302 }; 303 304 /// API for captured statement code generation in OpenMP target 305 /// constructs. For this captures, implicit parameters are used instead of the 306 /// captured fields. The name of the target region has to be unique in a given 307 /// application so it is provided by the client, because only the client has 308 /// the information to generate that. 309 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 310 public: 311 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 312 const RegionCodeGenTy &CodeGen, StringRef HelperName) 313 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 314 /*HasCancel=*/false), 315 HelperName(HelperName) {} 316 317 /// This is unused for target regions because each starts executing 318 /// with a single thread. 319 const VarDecl *getThreadIDVariable() const override { return nullptr; } 320 321 /// Get the name of the capture helper. 322 StringRef getHelperName() const override { return HelperName; } 323 324 static bool classof(const CGCapturedStmtInfo *Info) { 325 return CGOpenMPRegionInfo::classof(Info) && 326 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 327 } 328 329 private: 330 StringRef HelperName; 331 }; 332 333 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 334 llvm_unreachable("No codegen for expressions"); 335 } 336 /// API for generation of expressions captured in a innermost OpenMP 337 /// region. 338 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 339 public: 340 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 341 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 342 OMPD_unknown, 343 /*HasCancel=*/false), 344 PrivScope(CGF) { 345 // Make sure the globals captured in the provided statement are local by 346 // using the privatization logic. We assume the same variable is not 347 // captured more than once. 348 for (const auto &C : CS.captures()) { 349 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 350 continue; 351 352 const VarDecl *VD = C.getCapturedVar(); 353 if (VD->isLocalVarDeclOrParm()) 354 continue; 355 356 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 357 /*RefersToEnclosingVariableOrCapture=*/false, 358 VD->getType().getNonReferenceType(), VK_LValue, 359 C.getLocation()); 360 PrivScope.addPrivate( 361 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 362 } 363 (void)PrivScope.Privatize(); 364 } 365 366 /// Lookup the captured field decl for a variable. 367 const FieldDecl *lookup(const VarDecl *VD) const override { 368 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 369 return FD; 370 return nullptr; 371 } 372 373 /// Emit the captured statement body. 374 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 375 llvm_unreachable("No body for expressions"); 376 } 377 378 /// Get a variable or parameter for storing global thread id 379 /// inside OpenMP construct. 380 const VarDecl *getThreadIDVariable() const override { 381 llvm_unreachable("No thread id for expressions"); 382 } 383 384 /// Get the name of the capture helper. 385 StringRef getHelperName() const override { 386 llvm_unreachable("No helper name for expressions"); 387 } 388 389 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 390 391 private: 392 /// Private scope to capture global variables. 393 CodeGenFunction::OMPPrivateScope PrivScope; 394 }; 395 396 /// RAII for emitting code of OpenMP constructs. 397 class InlinedOpenMPRegionRAII { 398 CodeGenFunction &CGF; 399 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 400 FieldDecl *LambdaThisCaptureField = nullptr; 401 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 402 403 public: 404 /// Constructs region for combined constructs. 405 /// \param CodeGen Code generation sequence for combined directives. Includes 406 /// a list of functions used for code generation of implicitly inlined 407 /// regions. 408 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 409 OpenMPDirectiveKind Kind, bool HasCancel) 410 : CGF(CGF) { 411 // Start emission for the construct. 412 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 413 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 414 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 415 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 416 CGF.LambdaThisCaptureField = nullptr; 417 BlockInfo = CGF.BlockInfo; 418 CGF.BlockInfo = nullptr; 419 } 420 421 ~InlinedOpenMPRegionRAII() { 422 // Restore original CapturedStmtInfo only if we're done with code emission. 423 auto *OldCSI = 424 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 425 delete CGF.CapturedStmtInfo; 426 CGF.CapturedStmtInfo = OldCSI; 427 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 428 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 429 CGF.BlockInfo = BlockInfo; 430 } 431 }; 432 433 /// Values for bit flags used in the ident_t to describe the fields. 434 /// All enumeric elements are named and described in accordance with the code 435 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 436 enum OpenMPLocationFlags : unsigned { 437 /// Use trampoline for internal microtask. 438 OMP_IDENT_IMD = 0x01, 439 /// Use c-style ident structure. 440 OMP_IDENT_KMPC = 0x02, 441 /// Atomic reduction option for kmpc_reduce. 442 OMP_ATOMIC_REDUCE = 0x10, 443 /// Explicit 'barrier' directive. 444 OMP_IDENT_BARRIER_EXPL = 0x20, 445 /// Implicit barrier in code. 446 OMP_IDENT_BARRIER_IMPL = 0x40, 447 /// Implicit barrier in 'for' directive. 448 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 449 /// Implicit barrier in 'sections' directive. 450 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 451 /// Implicit barrier in 'single' directive. 452 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 453 /// Call of __kmp_for_static_init for static loop. 454 OMP_IDENT_WORK_LOOP = 0x200, 455 /// Call of __kmp_for_static_init for sections. 456 OMP_IDENT_WORK_SECTIONS = 0x400, 457 /// Call of __kmp_for_static_init for distribute. 458 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 459 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 460 }; 461 462 /// Describes ident structure that describes a source location. 463 /// All descriptions are taken from 464 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 465 /// Original structure: 466 /// typedef struct ident { 467 /// kmp_int32 reserved_1; /**< might be used in Fortran; 468 /// see above */ 469 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 470 /// KMP_IDENT_KMPC identifies this union 471 /// member */ 472 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 473 /// see above */ 474 ///#if USE_ITT_BUILD 475 /// /* but currently used for storing 476 /// region-specific ITT */ 477 /// /* contextual information. */ 478 ///#endif /* USE_ITT_BUILD */ 479 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 480 /// C++ */ 481 /// char const *psource; /**< String describing the source location. 482 /// The string is composed of semi-colon separated 483 // fields which describe the source file, 484 /// the function and a pair of line numbers that 485 /// delimit the construct. 486 /// */ 487 /// } ident_t; 488 enum IdentFieldIndex { 489 /// might be used in Fortran 490 IdentField_Reserved_1, 491 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 492 IdentField_Flags, 493 /// Not really used in Fortran any more 494 IdentField_Reserved_2, 495 /// Source[4] in Fortran, do not use for C++ 496 IdentField_Reserved_3, 497 /// String describing the source location. The string is composed of 498 /// semi-colon separated fields which describe the source file, the function 499 /// and a pair of line numbers that delimit the construct. 500 IdentField_PSource 501 }; 502 503 /// Schedule types for 'omp for' loops (these enumerators are taken from 504 /// the enum sched_type in kmp.h). 505 enum OpenMPSchedType { 506 /// Lower bound for default (unordered) versions. 507 OMP_sch_lower = 32, 508 OMP_sch_static_chunked = 33, 509 OMP_sch_static = 34, 510 OMP_sch_dynamic_chunked = 35, 511 OMP_sch_guided_chunked = 36, 512 OMP_sch_runtime = 37, 513 OMP_sch_auto = 38, 514 /// static with chunk adjustment (e.g., simd) 515 OMP_sch_static_balanced_chunked = 45, 516 /// Lower bound for 'ordered' versions. 517 OMP_ord_lower = 64, 518 OMP_ord_static_chunked = 65, 519 OMP_ord_static = 66, 520 OMP_ord_dynamic_chunked = 67, 521 OMP_ord_guided_chunked = 68, 522 OMP_ord_runtime = 69, 523 OMP_ord_auto = 70, 524 OMP_sch_default = OMP_sch_static, 525 /// dist_schedule types 526 OMP_dist_sch_static_chunked = 91, 527 OMP_dist_sch_static = 92, 528 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 529 /// Set if the monotonic schedule modifier was present. 530 OMP_sch_modifier_monotonic = (1 << 29), 531 /// Set if the nonmonotonic schedule modifier was present. 532 OMP_sch_modifier_nonmonotonic = (1 << 30), 533 }; 534 535 enum OpenMPRTLFunction { 536 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 537 /// kmpc_micro microtask, ...); 538 OMPRTL__kmpc_fork_call, 539 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 540 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 541 OMPRTL__kmpc_threadprivate_cached, 542 /// Call to void __kmpc_threadprivate_register( ident_t *, 543 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 544 OMPRTL__kmpc_threadprivate_register, 545 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 546 OMPRTL__kmpc_global_thread_num, 547 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 548 // kmp_critical_name *crit); 549 OMPRTL__kmpc_critical, 550 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 551 // global_tid, kmp_critical_name *crit, uintptr_t hint); 552 OMPRTL__kmpc_critical_with_hint, 553 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 554 // kmp_critical_name *crit); 555 OMPRTL__kmpc_end_critical, 556 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 557 // global_tid); 558 OMPRTL__kmpc_cancel_barrier, 559 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 560 OMPRTL__kmpc_barrier, 561 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 562 OMPRTL__kmpc_for_static_fini, 563 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 564 // global_tid); 565 OMPRTL__kmpc_serialized_parallel, 566 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 567 // global_tid); 568 OMPRTL__kmpc_end_serialized_parallel, 569 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 570 // kmp_int32 num_threads); 571 OMPRTL__kmpc_push_num_threads, 572 // Call to void __kmpc_flush(ident_t *loc); 573 OMPRTL__kmpc_flush, 574 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 575 OMPRTL__kmpc_master, 576 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 577 OMPRTL__kmpc_end_master, 578 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 579 // int end_part); 580 OMPRTL__kmpc_omp_taskyield, 581 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 582 OMPRTL__kmpc_single, 583 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 584 OMPRTL__kmpc_end_single, 585 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 586 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 587 // kmp_routine_entry_t *task_entry); 588 OMPRTL__kmpc_omp_task_alloc, 589 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 590 // new_task); 591 OMPRTL__kmpc_omp_task, 592 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 593 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 594 // kmp_int32 didit); 595 OMPRTL__kmpc_copyprivate, 596 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 597 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 598 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 599 OMPRTL__kmpc_reduce, 600 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 601 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 602 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 603 // *lck); 604 OMPRTL__kmpc_reduce_nowait, 605 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 606 // kmp_critical_name *lck); 607 OMPRTL__kmpc_end_reduce, 608 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 609 // kmp_critical_name *lck); 610 OMPRTL__kmpc_end_reduce_nowait, 611 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 612 // kmp_task_t * new_task); 613 OMPRTL__kmpc_omp_task_begin_if0, 614 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 615 // kmp_task_t * new_task); 616 OMPRTL__kmpc_omp_task_complete_if0, 617 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 618 OMPRTL__kmpc_ordered, 619 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 620 OMPRTL__kmpc_end_ordered, 621 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 622 // global_tid); 623 OMPRTL__kmpc_omp_taskwait, 624 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 625 OMPRTL__kmpc_taskgroup, 626 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 627 OMPRTL__kmpc_end_taskgroup, 628 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 629 // int proc_bind); 630 OMPRTL__kmpc_push_proc_bind, 631 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 632 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 633 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 634 OMPRTL__kmpc_omp_task_with_deps, 635 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 636 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 637 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 638 OMPRTL__kmpc_omp_wait_deps, 639 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 640 // global_tid, kmp_int32 cncl_kind); 641 OMPRTL__kmpc_cancellationpoint, 642 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 643 // kmp_int32 cncl_kind); 644 OMPRTL__kmpc_cancel, 645 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 646 // kmp_int32 num_teams, kmp_int32 thread_limit); 647 OMPRTL__kmpc_push_num_teams, 648 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 649 // microtask, ...); 650 OMPRTL__kmpc_fork_teams, 651 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 652 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 653 // sched, kmp_uint64 grainsize, void *task_dup); 654 OMPRTL__kmpc_taskloop, 655 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 656 // num_dims, struct kmp_dim *dims); 657 OMPRTL__kmpc_doacross_init, 658 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 659 OMPRTL__kmpc_doacross_fini, 660 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 661 // *vec); 662 OMPRTL__kmpc_doacross_post, 663 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 664 // *vec); 665 OMPRTL__kmpc_doacross_wait, 666 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 667 // *data); 668 OMPRTL__kmpc_task_reduction_init, 669 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 670 // *d); 671 OMPRTL__kmpc_task_reduction_get_th_data, 672 673 // 674 // Offloading related calls 675 // 676 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 677 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 678 // *arg_types); 679 OMPRTL__tgt_target, 680 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 681 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 682 // *arg_types); 683 OMPRTL__tgt_target_nowait, 684 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 685 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 686 // *arg_types, int32_t num_teams, int32_t thread_limit); 687 OMPRTL__tgt_target_teams, 688 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 689 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 690 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 691 OMPRTL__tgt_target_teams_nowait, 692 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 693 OMPRTL__tgt_register_lib, 694 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 695 OMPRTL__tgt_unregister_lib, 696 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 697 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 698 OMPRTL__tgt_target_data_begin, 699 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 700 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 701 // *arg_types); 702 OMPRTL__tgt_target_data_begin_nowait, 703 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 704 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 705 OMPRTL__tgt_target_data_end, 706 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 707 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 708 // *arg_types); 709 OMPRTL__tgt_target_data_end_nowait, 710 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 711 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 712 OMPRTL__tgt_target_data_update, 713 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 714 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 715 // *arg_types); 716 OMPRTL__tgt_target_data_update_nowait, 717 }; 718 719 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 720 /// region. 721 class CleanupTy final : public EHScopeStack::Cleanup { 722 PrePostActionTy *Action; 723 724 public: 725 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 726 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 727 if (!CGF.HaveInsertPoint()) 728 return; 729 Action->Exit(CGF); 730 } 731 }; 732 733 } // anonymous namespace 734 735 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 736 CodeGenFunction::RunCleanupsScope Scope(CGF); 737 if (PrePostAction) { 738 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 739 Callback(CodeGen, CGF, *PrePostAction); 740 } else { 741 PrePostActionTy Action; 742 Callback(CodeGen, CGF, Action); 743 } 744 } 745 746 /// Check if the combiner is a call to UDR combiner and if it is so return the 747 /// UDR decl used for reduction. 748 static const OMPDeclareReductionDecl * 749 getReductionInit(const Expr *ReductionOp) { 750 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 751 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 752 if (const auto *DRE = 753 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 754 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 755 return DRD; 756 return nullptr; 757 } 758 759 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 760 const OMPDeclareReductionDecl *DRD, 761 const Expr *InitOp, 762 Address Private, Address Original, 763 QualType Ty) { 764 if (DRD->getInitializer()) { 765 std::pair<llvm::Function *, llvm::Function *> Reduction = 766 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 767 const auto *CE = cast<CallExpr>(InitOp); 768 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 769 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 770 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 771 const auto *LHSDRE = 772 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 773 const auto *RHSDRE = 774 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 775 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 776 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 777 [=]() { return Private; }); 778 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 779 [=]() { return Original; }); 780 (void)PrivateScope.Privatize(); 781 RValue Func = RValue::get(Reduction.second); 782 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 783 CGF.EmitIgnoredExpr(InitOp); 784 } else { 785 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 786 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 787 auto *GV = new llvm::GlobalVariable( 788 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 789 llvm::GlobalValue::PrivateLinkage, Init, Name); 790 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 791 RValue InitRVal; 792 switch (CGF.getEvaluationKind(Ty)) { 793 case TEK_Scalar: 794 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 795 break; 796 case TEK_Complex: 797 InitRVal = 798 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 799 break; 800 case TEK_Aggregate: 801 InitRVal = RValue::getAggregate(LV.getAddress()); 802 break; 803 } 804 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 805 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 806 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 807 /*IsInitializer=*/false); 808 } 809 } 810 811 /// Emit initialization of arrays of complex types. 812 /// \param DestAddr Address of the array. 813 /// \param Type Type of array. 814 /// \param Init Initial expression of array. 815 /// \param SrcAddr Address of the original array. 816 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 817 QualType Type, bool EmitDeclareReductionInit, 818 const Expr *Init, 819 const OMPDeclareReductionDecl *DRD, 820 Address SrcAddr = Address::invalid()) { 821 // Perform element-by-element initialization. 822 QualType ElementTy; 823 824 // Drill down to the base element type on both arrays. 825 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 826 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 827 DestAddr = 828 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 829 if (DRD) 830 SrcAddr = 831 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 832 833 llvm::Value *SrcBegin = nullptr; 834 if (DRD) 835 SrcBegin = SrcAddr.getPointer(); 836 llvm::Value *DestBegin = DestAddr.getPointer(); 837 // Cast from pointer to array type to pointer to single element. 838 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 839 // The basic structure here is a while-do loop. 840 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 841 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 842 llvm::Value *IsEmpty = 843 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 844 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 845 846 // Enter the loop body, making that address the current address. 847 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 848 CGF.EmitBlock(BodyBB); 849 850 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 851 852 llvm::PHINode *SrcElementPHI = nullptr; 853 Address SrcElementCurrent = Address::invalid(); 854 if (DRD) { 855 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 856 "omp.arraycpy.srcElementPast"); 857 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 858 SrcElementCurrent = 859 Address(SrcElementPHI, 860 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 861 } 862 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 863 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 864 DestElementPHI->addIncoming(DestBegin, EntryBB); 865 Address DestElementCurrent = 866 Address(DestElementPHI, 867 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 868 869 // Emit copy. 870 { 871 CodeGenFunction::RunCleanupsScope InitScope(CGF); 872 if (EmitDeclareReductionInit) { 873 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 874 SrcElementCurrent, ElementTy); 875 } else 876 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 877 /*IsInitializer=*/false); 878 } 879 880 if (DRD) { 881 // Shift the address forward by one element. 882 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 883 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 884 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 885 } 886 887 // Shift the address forward by one element. 888 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 889 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 890 // Check whether we've reached the end. 891 llvm::Value *Done = 892 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 893 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 894 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 895 896 // Done. 897 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 898 } 899 900 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 901 return CGF.EmitOMPSharedLValue(E); 902 } 903 904 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 905 const Expr *E) { 906 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 907 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 908 return LValue(); 909 } 910 911 void ReductionCodeGen::emitAggregateInitialization( 912 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 913 const OMPDeclareReductionDecl *DRD) { 914 // Emit VarDecl with copy init for arrays. 915 // Get the address of the original variable captured in current 916 // captured region. 917 const auto *PrivateVD = 918 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 919 bool EmitDeclareReductionInit = 920 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 921 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 922 EmitDeclareReductionInit, 923 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 924 : PrivateVD->getInit(), 925 DRD, SharedLVal.getAddress()); 926 } 927 928 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 929 ArrayRef<const Expr *> Privates, 930 ArrayRef<const Expr *> ReductionOps) { 931 ClausesData.reserve(Shareds.size()); 932 SharedAddresses.reserve(Shareds.size()); 933 Sizes.reserve(Shareds.size()); 934 BaseDecls.reserve(Shareds.size()); 935 auto IPriv = Privates.begin(); 936 auto IRed = ReductionOps.begin(); 937 for (const Expr *Ref : Shareds) { 938 ClausesData.emplace_back(Ref, *IPriv, *IRed); 939 std::advance(IPriv, 1); 940 std::advance(IRed, 1); 941 } 942 } 943 944 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 945 assert(SharedAddresses.size() == N && 946 "Number of generated lvalues must be exactly N."); 947 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 948 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 949 SharedAddresses.emplace_back(First, Second); 950 } 951 952 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 953 const auto *PrivateVD = 954 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 955 QualType PrivateType = PrivateVD->getType(); 956 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 957 if (!PrivateType->isVariablyModifiedType()) { 958 Sizes.emplace_back( 959 CGF.getTypeSize( 960 SharedAddresses[N].first.getType().getNonReferenceType()), 961 nullptr); 962 return; 963 } 964 llvm::Value *Size; 965 llvm::Value *SizeInChars; 966 auto *ElemType = 967 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 968 ->getElementType(); 969 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 970 if (AsArraySection) { 971 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 972 SharedAddresses[N].first.getPointer()); 973 Size = CGF.Builder.CreateNUWAdd( 974 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 975 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 976 } else { 977 SizeInChars = CGF.getTypeSize( 978 SharedAddresses[N].first.getType().getNonReferenceType()); 979 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 980 } 981 Sizes.emplace_back(SizeInChars, Size); 982 CodeGenFunction::OpaqueValueMapping OpaqueMap( 983 CGF, 984 cast<OpaqueValueExpr>( 985 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 986 RValue::get(Size)); 987 CGF.EmitVariablyModifiedType(PrivateType); 988 } 989 990 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 991 llvm::Value *Size) { 992 const auto *PrivateVD = 993 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 994 QualType PrivateType = PrivateVD->getType(); 995 if (!PrivateType->isVariablyModifiedType()) { 996 assert(!Size && !Sizes[N].second && 997 "Size should be nullptr for non-variably modified reduction " 998 "items."); 999 return; 1000 } 1001 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1002 CGF, 1003 cast<OpaqueValueExpr>( 1004 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1005 RValue::get(Size)); 1006 CGF.EmitVariablyModifiedType(PrivateType); 1007 } 1008 1009 void ReductionCodeGen::emitInitialization( 1010 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1011 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1012 assert(SharedAddresses.size() > N && "No variable was generated"); 1013 const auto *PrivateVD = 1014 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1015 const OMPDeclareReductionDecl *DRD = 1016 getReductionInit(ClausesData[N].ReductionOp); 1017 QualType PrivateType = PrivateVD->getType(); 1018 PrivateAddr = CGF.Builder.CreateElementBitCast( 1019 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1020 QualType SharedType = SharedAddresses[N].first.getType(); 1021 SharedLVal = CGF.MakeAddrLValue( 1022 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1023 CGF.ConvertTypeForMem(SharedType)), 1024 SharedType, SharedAddresses[N].first.getBaseInfo(), 1025 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1026 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1027 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1028 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1029 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1030 PrivateAddr, SharedLVal.getAddress(), 1031 SharedLVal.getType()); 1032 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1033 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1034 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1035 PrivateVD->getType().getQualifiers(), 1036 /*IsInitializer=*/false); 1037 } 1038 } 1039 1040 bool ReductionCodeGen::needCleanups(unsigned N) { 1041 const auto *PrivateVD = 1042 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1043 QualType PrivateType = PrivateVD->getType(); 1044 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1045 return DTorKind != QualType::DK_none; 1046 } 1047 1048 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1049 Address PrivateAddr) { 1050 const auto *PrivateVD = 1051 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1052 QualType PrivateType = PrivateVD->getType(); 1053 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1054 if (needCleanups(N)) { 1055 PrivateAddr = CGF.Builder.CreateElementBitCast( 1056 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1057 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1058 } 1059 } 1060 1061 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1062 LValue BaseLV) { 1063 BaseTy = BaseTy.getNonReferenceType(); 1064 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1065 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1066 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1067 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1068 } else { 1069 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1070 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1071 } 1072 BaseTy = BaseTy->getPointeeType(); 1073 } 1074 return CGF.MakeAddrLValue( 1075 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1076 CGF.ConvertTypeForMem(ElTy)), 1077 BaseLV.getType(), BaseLV.getBaseInfo(), 1078 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1079 } 1080 1081 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1082 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1083 llvm::Value *Addr) { 1084 Address Tmp = Address::invalid(); 1085 Address TopTmp = Address::invalid(); 1086 Address MostTopTmp = Address::invalid(); 1087 BaseTy = BaseTy.getNonReferenceType(); 1088 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1089 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1090 Tmp = CGF.CreateMemTemp(BaseTy); 1091 if (TopTmp.isValid()) 1092 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1093 else 1094 MostTopTmp = Tmp; 1095 TopTmp = Tmp; 1096 BaseTy = BaseTy->getPointeeType(); 1097 } 1098 llvm::Type *Ty = BaseLVType; 1099 if (Tmp.isValid()) 1100 Ty = Tmp.getElementType(); 1101 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1102 if (Tmp.isValid()) { 1103 CGF.Builder.CreateStore(Addr, Tmp); 1104 return MostTopTmp; 1105 } 1106 return Address(Addr, BaseLVAlignment); 1107 } 1108 1109 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1110 const VarDecl *OrigVD = nullptr; 1111 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1112 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1113 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1114 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1115 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1116 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1117 DE = cast<DeclRefExpr>(Base); 1118 OrigVD = cast<VarDecl>(DE->getDecl()); 1119 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1120 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1121 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1122 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1123 DE = cast<DeclRefExpr>(Base); 1124 OrigVD = cast<VarDecl>(DE->getDecl()); 1125 } 1126 return OrigVD; 1127 } 1128 1129 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1130 Address PrivateAddr) { 1131 const DeclRefExpr *DE; 1132 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1133 BaseDecls.emplace_back(OrigVD); 1134 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1135 LValue BaseLValue = 1136 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1137 OriginalBaseLValue); 1138 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1139 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1140 llvm::Value *PrivatePointer = 1141 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1142 PrivateAddr.getPointer(), 1143 SharedAddresses[N].first.getAddress().getType()); 1144 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1145 return castToBase(CGF, OrigVD->getType(), 1146 SharedAddresses[N].first.getType(), 1147 OriginalBaseLValue.getAddress().getType(), 1148 OriginalBaseLValue.getAlignment(), Ptr); 1149 } 1150 BaseDecls.emplace_back( 1151 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1152 return PrivateAddr; 1153 } 1154 1155 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1156 const OMPDeclareReductionDecl *DRD = 1157 getReductionInit(ClausesData[N].ReductionOp); 1158 return DRD && DRD->getInitializer(); 1159 } 1160 1161 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1162 return CGF.EmitLoadOfPointerLValue( 1163 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1164 getThreadIDVariable()->getType()->castAs<PointerType>()); 1165 } 1166 1167 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1168 if (!CGF.HaveInsertPoint()) 1169 return; 1170 // 1.2.2 OpenMP Language Terminology 1171 // Structured block - An executable statement with a single entry at the 1172 // top and a single exit at the bottom. 1173 // The point of exit cannot be a branch out of the structured block. 1174 // longjmp() and throw() must not violate the entry/exit criteria. 1175 CGF.EHStack.pushTerminate(); 1176 CodeGen(CGF); 1177 CGF.EHStack.popTerminate(); 1178 } 1179 1180 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1181 CodeGenFunction &CGF) { 1182 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1183 getThreadIDVariable()->getType(), 1184 AlignmentSource::Decl); 1185 } 1186 1187 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1188 QualType FieldTy) { 1189 auto *Field = FieldDecl::Create( 1190 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1191 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1192 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1193 Field->setAccess(AS_public); 1194 DC->addDecl(Field); 1195 return Field; 1196 } 1197 1198 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1199 StringRef Separator) 1200 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1201 OffloadEntriesInfoManager(CGM) { 1202 ASTContext &C = CGM.getContext(); 1203 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1204 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1205 RD->startDefinition(); 1206 // reserved_1 1207 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1208 // flags 1209 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1210 // reserved_2 1211 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1212 // reserved_3 1213 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1214 // psource 1215 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1216 RD->completeDefinition(); 1217 IdentQTy = C.getRecordType(RD); 1218 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1219 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1220 1221 loadOffloadInfoMetadata(); 1222 } 1223 1224 void CGOpenMPRuntime::clear() { 1225 InternalVars.clear(); 1226 // Clean non-target variable declarations possibly used only in debug info. 1227 for (const auto &Data : EmittedNonTargetVariables) { 1228 if (!Data.getValue().pointsToAliveValue()) 1229 continue; 1230 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1231 if (!GV) 1232 continue; 1233 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1234 continue; 1235 GV->eraseFromParent(); 1236 } 1237 } 1238 1239 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1240 SmallString<128> Buffer; 1241 llvm::raw_svector_ostream OS(Buffer); 1242 StringRef Sep = FirstSeparator; 1243 for (StringRef Part : Parts) { 1244 OS << Sep << Part; 1245 Sep = Separator; 1246 } 1247 return OS.str(); 1248 } 1249 1250 static llvm::Function * 1251 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1252 const Expr *CombinerInitializer, const VarDecl *In, 1253 const VarDecl *Out, bool IsCombiner) { 1254 // void .omp_combiner.(Ty *in, Ty *out); 1255 ASTContext &C = CGM.getContext(); 1256 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1257 FunctionArgList Args; 1258 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1259 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1260 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1261 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1262 Args.push_back(&OmpOutParm); 1263 Args.push_back(&OmpInParm); 1264 const CGFunctionInfo &FnInfo = 1265 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1266 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1267 std::string Name = CGM.getOpenMPRuntime().getName( 1268 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1269 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1270 Name, &CGM.getModule()); 1271 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1272 Fn->removeFnAttr(llvm::Attribute::NoInline); 1273 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1274 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1275 CodeGenFunction CGF(CGM); 1276 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1277 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1278 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1279 Out->getLocation()); 1280 CodeGenFunction::OMPPrivateScope Scope(CGF); 1281 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1282 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1283 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1284 .getAddress(); 1285 }); 1286 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1287 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1288 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1289 .getAddress(); 1290 }); 1291 (void)Scope.Privatize(); 1292 if (!IsCombiner && Out->hasInit() && 1293 !CGF.isTrivialInitializer(Out->getInit())) { 1294 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1295 Out->getType().getQualifiers(), 1296 /*IsInitializer=*/true); 1297 } 1298 if (CombinerInitializer) 1299 CGF.EmitIgnoredExpr(CombinerInitializer); 1300 Scope.ForceCleanup(); 1301 CGF.FinishFunction(); 1302 return Fn; 1303 } 1304 1305 void CGOpenMPRuntime::emitUserDefinedReduction( 1306 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1307 if (UDRMap.count(D) > 0) 1308 return; 1309 llvm::Function *Combiner = emitCombinerOrInitializer( 1310 CGM, D->getType(), D->getCombiner(), 1311 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1312 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1313 /*IsCombiner=*/true); 1314 llvm::Function *Initializer = nullptr; 1315 if (const Expr *Init = D->getInitializer()) { 1316 Initializer = emitCombinerOrInitializer( 1317 CGM, D->getType(), 1318 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1319 : nullptr, 1320 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1321 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1322 /*IsCombiner=*/false); 1323 } 1324 UDRMap.try_emplace(D, Combiner, Initializer); 1325 if (CGF) { 1326 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1327 Decls.second.push_back(D); 1328 } 1329 } 1330 1331 std::pair<llvm::Function *, llvm::Function *> 1332 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1333 auto I = UDRMap.find(D); 1334 if (I != UDRMap.end()) 1335 return I->second; 1336 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1337 return UDRMap.lookup(D); 1338 } 1339 1340 static llvm::Value *emitParallelOrTeamsOutlinedFunction( 1341 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1342 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1343 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1344 assert(ThreadIDVar->getType()->isPointerType() && 1345 "thread id variable must be of type kmp_int32 *"); 1346 CodeGenFunction CGF(CGM, true); 1347 bool HasCancel = false; 1348 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1349 HasCancel = OPD->hasCancel(); 1350 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1351 HasCancel = OPSD->hasCancel(); 1352 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1353 HasCancel = OPFD->hasCancel(); 1354 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1355 HasCancel = OPFD->hasCancel(); 1356 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1357 HasCancel = OPFD->hasCancel(); 1358 else if (const auto *OPFD = 1359 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1360 HasCancel = OPFD->hasCancel(); 1361 else if (const auto *OPFD = 1362 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1363 HasCancel = OPFD->hasCancel(); 1364 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1365 HasCancel, OutlinedHelperName); 1366 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1367 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1368 } 1369 1370 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 1371 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1372 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1373 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1374 return emitParallelOrTeamsOutlinedFunction( 1375 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1376 } 1377 1378 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1379 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1380 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1381 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1382 return emitParallelOrTeamsOutlinedFunction( 1383 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1384 } 1385 1386 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 1387 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1388 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1389 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1390 bool Tied, unsigned &NumberOfParts) { 1391 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1392 PrePostActionTy &) { 1393 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1394 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1395 llvm::Value *TaskArgs[] = { 1396 UpLoc, ThreadID, 1397 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1398 TaskTVar->getType()->castAs<PointerType>()) 1399 .getPointer()}; 1400 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1401 }; 1402 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1403 UntiedCodeGen); 1404 CodeGen.setAction(Action); 1405 assert(!ThreadIDVar->getType()->isPointerType() && 1406 "thread id variable must be of type kmp_int32 for tasks"); 1407 const OpenMPDirectiveKind Region = 1408 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1409 : OMPD_task; 1410 const CapturedStmt *CS = D.getCapturedStmt(Region); 1411 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1412 CodeGenFunction CGF(CGM, true); 1413 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1414 InnermostKind, 1415 TD ? TD->hasCancel() : false, Action); 1416 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1417 llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS); 1418 if (!Tied) 1419 NumberOfParts = Action.getNumberOfParts(); 1420 return Res; 1421 } 1422 1423 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1424 const RecordDecl *RD, const CGRecordLayout &RL, 1425 ArrayRef<llvm::Constant *> Data) { 1426 llvm::StructType *StructTy = RL.getLLVMType(); 1427 unsigned PrevIdx = 0; 1428 ConstantInitBuilder CIBuilder(CGM); 1429 auto DI = Data.begin(); 1430 for (const FieldDecl *FD : RD->fields()) { 1431 unsigned Idx = RL.getLLVMFieldNo(FD); 1432 // Fill the alignment. 1433 for (unsigned I = PrevIdx; I < Idx; ++I) 1434 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1435 PrevIdx = Idx + 1; 1436 Fields.add(*DI); 1437 ++DI; 1438 } 1439 } 1440 1441 template <class... As> 1442 static llvm::GlobalVariable * 1443 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1444 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1445 As &&... Args) { 1446 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1447 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1448 ConstantInitBuilder CIBuilder(CGM); 1449 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1450 buildStructValue(Fields, CGM, RD, RL, Data); 1451 return Fields.finishAndCreateGlobal( 1452 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1453 std::forward<As>(Args)...); 1454 } 1455 1456 template <typename T> 1457 static void 1458 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1459 ArrayRef<llvm::Constant *> Data, 1460 T &Parent) { 1461 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1462 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1463 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1464 buildStructValue(Fields, CGM, RD, RL, Data); 1465 Fields.finishAndAddTo(Parent); 1466 } 1467 1468 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1469 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1470 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 1471 if (!Entry) { 1472 if (!DefaultOpenMPPSource) { 1473 // Initialize default location for psource field of ident_t structure of 1474 // all ident_t objects. Format is ";file;function;line;column;;". 1475 // Taken from 1476 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 1477 DefaultOpenMPPSource = 1478 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1479 DefaultOpenMPPSource = 1480 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1481 } 1482 1483 llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1484 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1485 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1486 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1487 DefaultOpenMPPSource}; 1488 llvm::GlobalValue *DefaultOpenMPLocation = 1489 createGlobalStruct(CGM, IdentQTy, /*IsConstant=*/false, Data, "", 1490 llvm::GlobalValue::PrivateLinkage); 1491 DefaultOpenMPLocation->setUnnamedAddr( 1492 llvm::GlobalValue::UnnamedAddr::Global); 1493 1494 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 1495 } 1496 return Address(Entry, Align); 1497 } 1498 1499 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1500 bool AtCurrentPoint) { 1501 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1502 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1503 1504 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1505 if (AtCurrentPoint) { 1506 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1507 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1508 } else { 1509 Elem.second.ServiceInsertPt = 1510 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1511 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1512 } 1513 } 1514 1515 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1516 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1517 if (Elem.second.ServiceInsertPt) { 1518 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1519 Elem.second.ServiceInsertPt = nullptr; 1520 Ptr->eraseFromParent(); 1521 } 1522 } 1523 1524 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1525 SourceLocation Loc, 1526 unsigned Flags) { 1527 Flags |= OMP_IDENT_KMPC; 1528 // If no debug info is generated - return global default location. 1529 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1530 Loc.isInvalid()) 1531 return getOrCreateDefaultLocation(Flags).getPointer(); 1532 1533 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1534 1535 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1536 Address LocValue = Address::invalid(); 1537 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1538 if (I != OpenMPLocThreadIDMap.end()) 1539 LocValue = Address(I->second.DebugLoc, Align); 1540 1541 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1542 // GetOpenMPThreadID was called before this routine. 1543 if (!LocValue.isValid()) { 1544 // Generate "ident_t .kmpc_loc.addr;" 1545 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1546 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1547 Elem.second.DebugLoc = AI.getPointer(); 1548 LocValue = AI; 1549 1550 if (!Elem.second.ServiceInsertPt) 1551 setLocThreadIdInsertPt(CGF); 1552 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1553 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1554 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1555 CGF.getTypeSize(IdentQTy)); 1556 } 1557 1558 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1559 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1560 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1561 LValue PSource = 1562 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1563 1564 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1565 if (OMPDebugLoc == nullptr) { 1566 SmallString<128> Buffer2; 1567 llvm::raw_svector_ostream OS2(Buffer2); 1568 // Build debug location 1569 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1570 OS2 << ";" << PLoc.getFilename() << ";"; 1571 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1572 OS2 << FD->getQualifiedNameAsString(); 1573 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1574 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1575 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1576 } 1577 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1578 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1579 1580 // Our callers always pass this to a runtime function, so for 1581 // convenience, go ahead and return a naked pointer. 1582 return LocValue.getPointer(); 1583 } 1584 1585 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1586 SourceLocation Loc) { 1587 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1588 1589 llvm::Value *ThreadID = nullptr; 1590 // Check whether we've already cached a load of the thread id in this 1591 // function. 1592 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1593 if (I != OpenMPLocThreadIDMap.end()) { 1594 ThreadID = I->second.ThreadID; 1595 if (ThreadID != nullptr) 1596 return ThreadID; 1597 } 1598 // If exceptions are enabled, do not use parameter to avoid possible crash. 1599 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1600 !CGF.getLangOpts().CXXExceptions || 1601 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1602 if (auto *OMPRegionInfo = 1603 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1604 if (OMPRegionInfo->getThreadIDVariable()) { 1605 // Check if this an outlined function with thread id passed as argument. 1606 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1607 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1608 // If value loaded in entry block, cache it and use it everywhere in 1609 // function. 1610 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1611 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1612 Elem.second.ThreadID = ThreadID; 1613 } 1614 return ThreadID; 1615 } 1616 } 1617 } 1618 1619 // This is not an outlined function region - need to call __kmpc_int32 1620 // kmpc_global_thread_num(ident_t *loc). 1621 // Generate thread id value and cache this value for use across the 1622 // function. 1623 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1624 if (!Elem.second.ServiceInsertPt) 1625 setLocThreadIdInsertPt(CGF); 1626 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1627 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1628 llvm::CallInst *Call = CGF.Builder.CreateCall( 1629 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1630 emitUpdateLocation(CGF, Loc)); 1631 Call->setCallingConv(CGF.getRuntimeCC()); 1632 Elem.second.ThreadID = Call; 1633 return Call; 1634 } 1635 1636 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1637 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1638 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1639 clearLocThreadIdInsertPt(CGF); 1640 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1641 } 1642 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1643 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1644 UDRMap.erase(D); 1645 FunctionUDRMap.erase(CGF.CurFn); 1646 } 1647 } 1648 1649 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1650 return IdentTy->getPointerTo(); 1651 } 1652 1653 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1654 if (!Kmpc_MicroTy) { 1655 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1656 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1657 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1658 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1659 } 1660 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1661 } 1662 1663 llvm::Constant * 1664 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1665 llvm::Constant *RTLFn = nullptr; 1666 switch (static_cast<OpenMPRTLFunction>(Function)) { 1667 case OMPRTL__kmpc_fork_call: { 1668 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1669 // microtask, ...); 1670 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1671 getKmpc_MicroPointerTy()}; 1672 auto *FnTy = 1673 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1674 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1675 break; 1676 } 1677 case OMPRTL__kmpc_global_thread_num: { 1678 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1679 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1680 auto *FnTy = 1681 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1682 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1683 break; 1684 } 1685 case OMPRTL__kmpc_threadprivate_cached: { 1686 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1687 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1688 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1689 CGM.VoidPtrTy, CGM.SizeTy, 1690 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1691 auto *FnTy = 1692 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1693 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1694 break; 1695 } 1696 case OMPRTL__kmpc_critical: { 1697 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1698 // kmp_critical_name *crit); 1699 llvm::Type *TypeParams[] = { 1700 getIdentTyPointerTy(), CGM.Int32Ty, 1701 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1702 auto *FnTy = 1703 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1704 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1705 break; 1706 } 1707 case OMPRTL__kmpc_critical_with_hint: { 1708 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1709 // kmp_critical_name *crit, uintptr_t hint); 1710 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1711 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1712 CGM.IntPtrTy}; 1713 auto *FnTy = 1714 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1715 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1716 break; 1717 } 1718 case OMPRTL__kmpc_threadprivate_register: { 1719 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1720 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1721 // typedef void *(*kmpc_ctor)(void *); 1722 auto *KmpcCtorTy = 1723 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1724 /*isVarArg*/ false)->getPointerTo(); 1725 // typedef void *(*kmpc_cctor)(void *, void *); 1726 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1727 auto *KmpcCopyCtorTy = 1728 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1729 /*isVarArg*/ false) 1730 ->getPointerTo(); 1731 // typedef void (*kmpc_dtor)(void *); 1732 auto *KmpcDtorTy = 1733 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1734 ->getPointerTo(); 1735 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1736 KmpcCopyCtorTy, KmpcDtorTy}; 1737 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1738 /*isVarArg*/ false); 1739 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1740 break; 1741 } 1742 case OMPRTL__kmpc_end_critical: { 1743 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1744 // kmp_critical_name *crit); 1745 llvm::Type *TypeParams[] = { 1746 getIdentTyPointerTy(), CGM.Int32Ty, 1747 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1748 auto *FnTy = 1749 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1750 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1751 break; 1752 } 1753 case OMPRTL__kmpc_cancel_barrier: { 1754 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1755 // global_tid); 1756 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1757 auto *FnTy = 1758 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1759 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1760 break; 1761 } 1762 case OMPRTL__kmpc_barrier: { 1763 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1764 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1765 auto *FnTy = 1766 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1767 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1768 break; 1769 } 1770 case OMPRTL__kmpc_for_static_fini: { 1771 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1772 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1773 auto *FnTy = 1774 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1775 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1776 break; 1777 } 1778 case OMPRTL__kmpc_push_num_threads: { 1779 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1780 // kmp_int32 num_threads) 1781 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1782 CGM.Int32Ty}; 1783 auto *FnTy = 1784 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1785 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1786 break; 1787 } 1788 case OMPRTL__kmpc_serialized_parallel: { 1789 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1790 // global_tid); 1791 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1792 auto *FnTy = 1793 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1794 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1795 break; 1796 } 1797 case OMPRTL__kmpc_end_serialized_parallel: { 1798 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1799 // global_tid); 1800 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1801 auto *FnTy = 1802 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1803 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1804 break; 1805 } 1806 case OMPRTL__kmpc_flush: { 1807 // Build void __kmpc_flush(ident_t *loc); 1808 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1809 auto *FnTy = 1810 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1811 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1812 break; 1813 } 1814 case OMPRTL__kmpc_master: { 1815 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1816 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1817 auto *FnTy = 1818 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1819 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1820 break; 1821 } 1822 case OMPRTL__kmpc_end_master: { 1823 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1824 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1825 auto *FnTy = 1826 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1827 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1828 break; 1829 } 1830 case OMPRTL__kmpc_omp_taskyield: { 1831 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1832 // int end_part); 1833 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1834 auto *FnTy = 1835 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1836 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1837 break; 1838 } 1839 case OMPRTL__kmpc_single: { 1840 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1841 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1842 auto *FnTy = 1843 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1844 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1845 break; 1846 } 1847 case OMPRTL__kmpc_end_single: { 1848 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1849 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1850 auto *FnTy = 1851 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1852 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1853 break; 1854 } 1855 case OMPRTL__kmpc_omp_task_alloc: { 1856 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1857 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1858 // kmp_routine_entry_t *task_entry); 1859 assert(KmpRoutineEntryPtrTy != nullptr && 1860 "Type kmp_routine_entry_t must be created."); 1861 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1862 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1863 // Return void * and then cast to particular kmp_task_t type. 1864 auto *FnTy = 1865 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1866 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1867 break; 1868 } 1869 case OMPRTL__kmpc_omp_task: { 1870 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1871 // *new_task); 1872 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1873 CGM.VoidPtrTy}; 1874 auto *FnTy = 1875 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1876 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1877 break; 1878 } 1879 case OMPRTL__kmpc_copyprivate: { 1880 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1881 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1882 // kmp_int32 didit); 1883 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1884 auto *CpyFnTy = 1885 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1886 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1887 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1888 CGM.Int32Ty}; 1889 auto *FnTy = 1890 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1891 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1892 break; 1893 } 1894 case OMPRTL__kmpc_reduce: { 1895 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1896 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1897 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1898 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1899 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1900 /*isVarArg=*/false); 1901 llvm::Type *TypeParams[] = { 1902 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1903 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1904 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1905 auto *FnTy = 1906 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1907 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1908 break; 1909 } 1910 case OMPRTL__kmpc_reduce_nowait: { 1911 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1912 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1913 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1914 // *lck); 1915 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1916 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1917 /*isVarArg=*/false); 1918 llvm::Type *TypeParams[] = { 1919 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1920 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1921 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1922 auto *FnTy = 1923 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1924 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1925 break; 1926 } 1927 case OMPRTL__kmpc_end_reduce: { 1928 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1929 // kmp_critical_name *lck); 1930 llvm::Type *TypeParams[] = { 1931 getIdentTyPointerTy(), CGM.Int32Ty, 1932 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1933 auto *FnTy = 1934 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1935 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1936 break; 1937 } 1938 case OMPRTL__kmpc_end_reduce_nowait: { 1939 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1940 // kmp_critical_name *lck); 1941 llvm::Type *TypeParams[] = { 1942 getIdentTyPointerTy(), CGM.Int32Ty, 1943 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1944 auto *FnTy = 1945 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1946 RTLFn = 1947 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1948 break; 1949 } 1950 case OMPRTL__kmpc_omp_task_begin_if0: { 1951 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1952 // *new_task); 1953 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1954 CGM.VoidPtrTy}; 1955 auto *FnTy = 1956 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1957 RTLFn = 1958 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1959 break; 1960 } 1961 case OMPRTL__kmpc_omp_task_complete_if0: { 1962 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1963 // *new_task); 1964 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1965 CGM.VoidPtrTy}; 1966 auto *FnTy = 1967 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1968 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1969 /*Name=*/"__kmpc_omp_task_complete_if0"); 1970 break; 1971 } 1972 case OMPRTL__kmpc_ordered: { 1973 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1974 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1975 auto *FnTy = 1976 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1977 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1978 break; 1979 } 1980 case OMPRTL__kmpc_end_ordered: { 1981 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1982 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1983 auto *FnTy = 1984 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1985 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1986 break; 1987 } 1988 case OMPRTL__kmpc_omp_taskwait: { 1989 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1990 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1991 auto *FnTy = 1992 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1993 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1994 break; 1995 } 1996 case OMPRTL__kmpc_taskgroup: { 1997 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1998 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1999 auto *FnTy = 2000 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2001 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2002 break; 2003 } 2004 case OMPRTL__kmpc_end_taskgroup: { 2005 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2006 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2007 auto *FnTy = 2008 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2009 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2010 break; 2011 } 2012 case OMPRTL__kmpc_push_proc_bind: { 2013 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2014 // int proc_bind) 2015 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2016 auto *FnTy = 2017 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2018 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2019 break; 2020 } 2021 case OMPRTL__kmpc_omp_task_with_deps: { 2022 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2023 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2024 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2025 llvm::Type *TypeParams[] = { 2026 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2027 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2028 auto *FnTy = 2029 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2030 RTLFn = 2031 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2032 break; 2033 } 2034 case OMPRTL__kmpc_omp_wait_deps: { 2035 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2036 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2037 // kmp_depend_info_t *noalias_dep_list); 2038 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2039 CGM.Int32Ty, CGM.VoidPtrTy, 2040 CGM.Int32Ty, CGM.VoidPtrTy}; 2041 auto *FnTy = 2042 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2043 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2044 break; 2045 } 2046 case OMPRTL__kmpc_cancellationpoint: { 2047 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2048 // global_tid, kmp_int32 cncl_kind) 2049 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2050 auto *FnTy = 2051 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2052 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2053 break; 2054 } 2055 case OMPRTL__kmpc_cancel: { 2056 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2057 // kmp_int32 cncl_kind) 2058 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2059 auto *FnTy = 2060 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2061 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2062 break; 2063 } 2064 case OMPRTL__kmpc_push_num_teams: { 2065 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2066 // kmp_int32 num_teams, kmp_int32 num_threads) 2067 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2068 CGM.Int32Ty}; 2069 auto *FnTy = 2070 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2071 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2072 break; 2073 } 2074 case OMPRTL__kmpc_fork_teams: { 2075 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2076 // microtask, ...); 2077 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2078 getKmpc_MicroPointerTy()}; 2079 auto *FnTy = 2080 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2081 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2082 break; 2083 } 2084 case OMPRTL__kmpc_taskloop: { 2085 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2086 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2087 // sched, kmp_uint64 grainsize, void *task_dup); 2088 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2089 CGM.IntTy, 2090 CGM.VoidPtrTy, 2091 CGM.IntTy, 2092 CGM.Int64Ty->getPointerTo(), 2093 CGM.Int64Ty->getPointerTo(), 2094 CGM.Int64Ty, 2095 CGM.IntTy, 2096 CGM.IntTy, 2097 CGM.Int64Ty, 2098 CGM.VoidPtrTy}; 2099 auto *FnTy = 2100 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2101 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2102 break; 2103 } 2104 case OMPRTL__kmpc_doacross_init: { 2105 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2106 // num_dims, struct kmp_dim *dims); 2107 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2108 CGM.Int32Ty, 2109 CGM.Int32Ty, 2110 CGM.VoidPtrTy}; 2111 auto *FnTy = 2112 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2113 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2114 break; 2115 } 2116 case OMPRTL__kmpc_doacross_fini: { 2117 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2118 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2119 auto *FnTy = 2120 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2121 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2122 break; 2123 } 2124 case OMPRTL__kmpc_doacross_post: { 2125 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2126 // *vec); 2127 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2128 CGM.Int64Ty->getPointerTo()}; 2129 auto *FnTy = 2130 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2131 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2132 break; 2133 } 2134 case OMPRTL__kmpc_doacross_wait: { 2135 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2136 // *vec); 2137 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2138 CGM.Int64Ty->getPointerTo()}; 2139 auto *FnTy = 2140 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2141 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2142 break; 2143 } 2144 case OMPRTL__kmpc_task_reduction_init: { 2145 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2146 // *data); 2147 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2148 auto *FnTy = 2149 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2150 RTLFn = 2151 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2152 break; 2153 } 2154 case OMPRTL__kmpc_task_reduction_get_th_data: { 2155 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2156 // *d); 2157 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2158 auto *FnTy = 2159 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2160 RTLFn = CGM.CreateRuntimeFunction( 2161 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2162 break; 2163 } 2164 case OMPRTL__tgt_target: { 2165 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2166 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2167 // *arg_types); 2168 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2169 CGM.VoidPtrTy, 2170 CGM.Int32Ty, 2171 CGM.VoidPtrPtrTy, 2172 CGM.VoidPtrPtrTy, 2173 CGM.SizeTy->getPointerTo(), 2174 CGM.Int64Ty->getPointerTo()}; 2175 auto *FnTy = 2176 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2177 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2178 break; 2179 } 2180 case OMPRTL__tgt_target_nowait: { 2181 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2182 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2183 // int64_t *arg_types); 2184 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2185 CGM.VoidPtrTy, 2186 CGM.Int32Ty, 2187 CGM.VoidPtrPtrTy, 2188 CGM.VoidPtrPtrTy, 2189 CGM.SizeTy->getPointerTo(), 2190 CGM.Int64Ty->getPointerTo()}; 2191 auto *FnTy = 2192 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2193 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2194 break; 2195 } 2196 case OMPRTL__tgt_target_teams: { 2197 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2198 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2199 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2200 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2201 CGM.VoidPtrTy, 2202 CGM.Int32Ty, 2203 CGM.VoidPtrPtrTy, 2204 CGM.VoidPtrPtrTy, 2205 CGM.SizeTy->getPointerTo(), 2206 CGM.Int64Ty->getPointerTo(), 2207 CGM.Int32Ty, 2208 CGM.Int32Ty}; 2209 auto *FnTy = 2210 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2211 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2212 break; 2213 } 2214 case OMPRTL__tgt_target_teams_nowait: { 2215 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2216 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2217 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2218 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2219 CGM.VoidPtrTy, 2220 CGM.Int32Ty, 2221 CGM.VoidPtrPtrTy, 2222 CGM.VoidPtrPtrTy, 2223 CGM.SizeTy->getPointerTo(), 2224 CGM.Int64Ty->getPointerTo(), 2225 CGM.Int32Ty, 2226 CGM.Int32Ty}; 2227 auto *FnTy = 2228 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2229 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2230 break; 2231 } 2232 case OMPRTL__tgt_register_lib: { 2233 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2234 QualType ParamTy = 2235 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2236 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2237 auto *FnTy = 2238 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2239 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2240 break; 2241 } 2242 case OMPRTL__tgt_unregister_lib: { 2243 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2244 QualType ParamTy = 2245 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2246 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2247 auto *FnTy = 2248 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2249 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2250 break; 2251 } 2252 case OMPRTL__tgt_target_data_begin: { 2253 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2254 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2255 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2256 CGM.Int32Ty, 2257 CGM.VoidPtrPtrTy, 2258 CGM.VoidPtrPtrTy, 2259 CGM.SizeTy->getPointerTo(), 2260 CGM.Int64Ty->getPointerTo()}; 2261 auto *FnTy = 2262 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2263 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2264 break; 2265 } 2266 case OMPRTL__tgt_target_data_begin_nowait: { 2267 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2268 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2269 // *arg_types); 2270 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2271 CGM.Int32Ty, 2272 CGM.VoidPtrPtrTy, 2273 CGM.VoidPtrPtrTy, 2274 CGM.SizeTy->getPointerTo(), 2275 CGM.Int64Ty->getPointerTo()}; 2276 auto *FnTy = 2277 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2278 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2279 break; 2280 } 2281 case OMPRTL__tgt_target_data_end: { 2282 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2283 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2284 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2285 CGM.Int32Ty, 2286 CGM.VoidPtrPtrTy, 2287 CGM.VoidPtrPtrTy, 2288 CGM.SizeTy->getPointerTo(), 2289 CGM.Int64Ty->getPointerTo()}; 2290 auto *FnTy = 2291 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2292 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2293 break; 2294 } 2295 case OMPRTL__tgt_target_data_end_nowait: { 2296 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2297 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2298 // *arg_types); 2299 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2300 CGM.Int32Ty, 2301 CGM.VoidPtrPtrTy, 2302 CGM.VoidPtrPtrTy, 2303 CGM.SizeTy->getPointerTo(), 2304 CGM.Int64Ty->getPointerTo()}; 2305 auto *FnTy = 2306 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2307 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2308 break; 2309 } 2310 case OMPRTL__tgt_target_data_update: { 2311 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2312 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2313 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2314 CGM.Int32Ty, 2315 CGM.VoidPtrPtrTy, 2316 CGM.VoidPtrPtrTy, 2317 CGM.SizeTy->getPointerTo(), 2318 CGM.Int64Ty->getPointerTo()}; 2319 auto *FnTy = 2320 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2321 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2322 break; 2323 } 2324 case OMPRTL__tgt_target_data_update_nowait: { 2325 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2326 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2327 // *arg_types); 2328 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2329 CGM.Int32Ty, 2330 CGM.VoidPtrPtrTy, 2331 CGM.VoidPtrPtrTy, 2332 CGM.SizeTy->getPointerTo(), 2333 CGM.Int64Ty->getPointerTo()}; 2334 auto *FnTy = 2335 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2336 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2337 break; 2338 } 2339 } 2340 assert(RTLFn && "Unable to find OpenMP runtime function"); 2341 return RTLFn; 2342 } 2343 2344 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 2345 bool IVSigned) { 2346 assert((IVSize == 32 || IVSize == 64) && 2347 "IV size is not compatible with the omp runtime"); 2348 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2349 : "__kmpc_for_static_init_4u") 2350 : (IVSigned ? "__kmpc_for_static_init_8" 2351 : "__kmpc_for_static_init_8u"); 2352 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2353 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2354 llvm::Type *TypeParams[] = { 2355 getIdentTyPointerTy(), // loc 2356 CGM.Int32Ty, // tid 2357 CGM.Int32Ty, // schedtype 2358 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2359 PtrTy, // p_lower 2360 PtrTy, // p_upper 2361 PtrTy, // p_stride 2362 ITy, // incr 2363 ITy // chunk 2364 }; 2365 auto *FnTy = 2366 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2367 return CGM.CreateRuntimeFunction(FnTy, Name); 2368 } 2369 2370 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 2371 bool IVSigned) { 2372 assert((IVSize == 32 || IVSize == 64) && 2373 "IV size is not compatible with the omp runtime"); 2374 StringRef Name = 2375 IVSize == 32 2376 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2377 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2378 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2379 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2380 CGM.Int32Ty, // tid 2381 CGM.Int32Ty, // schedtype 2382 ITy, // lower 2383 ITy, // upper 2384 ITy, // stride 2385 ITy // chunk 2386 }; 2387 auto *FnTy = 2388 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2389 return CGM.CreateRuntimeFunction(FnTy, Name); 2390 } 2391 2392 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 2393 bool IVSigned) { 2394 assert((IVSize == 32 || IVSize == 64) && 2395 "IV size is not compatible with the omp runtime"); 2396 StringRef Name = 2397 IVSize == 32 2398 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2399 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2400 llvm::Type *TypeParams[] = { 2401 getIdentTyPointerTy(), // loc 2402 CGM.Int32Ty, // tid 2403 }; 2404 auto *FnTy = 2405 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2406 return CGM.CreateRuntimeFunction(FnTy, Name); 2407 } 2408 2409 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 2410 bool IVSigned) { 2411 assert((IVSize == 32 || IVSize == 64) && 2412 "IV size is not compatible with the omp runtime"); 2413 StringRef Name = 2414 IVSize == 32 2415 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2416 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2417 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2418 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2419 llvm::Type *TypeParams[] = { 2420 getIdentTyPointerTy(), // loc 2421 CGM.Int32Ty, // tid 2422 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2423 PtrTy, // p_lower 2424 PtrTy, // p_upper 2425 PtrTy // p_stride 2426 }; 2427 auto *FnTy = 2428 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2429 return CGM.CreateRuntimeFunction(FnTy, Name); 2430 } 2431 2432 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { 2433 if (CGM.getLangOpts().OpenMPSimd) 2434 return Address::invalid(); 2435 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2436 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2437 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { 2438 SmallString<64> PtrName; 2439 { 2440 llvm::raw_svector_ostream OS(PtrName); 2441 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; 2442 } 2443 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2444 if (!Ptr) { 2445 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2446 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2447 PtrName); 2448 if (!CGM.getLangOpts().OpenMPIsDevice) { 2449 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2450 GV->setLinkage(llvm::GlobalValue::ExternalLinkage); 2451 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2452 } 2453 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); 2454 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2455 } 2456 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2457 } 2458 return Address::invalid(); 2459 } 2460 2461 llvm::Constant * 2462 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2463 assert(!CGM.getLangOpts().OpenMPUseTLS || 2464 !CGM.getContext().getTargetInfo().isTLSSupported()); 2465 // Lookup the entry, lazily creating it if necessary. 2466 std::string Suffix = getName({"cache", ""}); 2467 return getOrCreateInternalVariable( 2468 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2469 } 2470 2471 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2472 const VarDecl *VD, 2473 Address VDAddr, 2474 SourceLocation Loc) { 2475 if (CGM.getLangOpts().OpenMPUseTLS && 2476 CGM.getContext().getTargetInfo().isTLSSupported()) 2477 return VDAddr; 2478 2479 llvm::Type *VarTy = VDAddr.getElementType(); 2480 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2481 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2482 CGM.Int8PtrTy), 2483 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2484 getOrCreateThreadPrivateCache(VD)}; 2485 return Address(CGF.EmitRuntimeCall( 2486 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2487 VDAddr.getAlignment()); 2488 } 2489 2490 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2491 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2492 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2493 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2494 // library. 2495 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2496 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2497 OMPLoc); 2498 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2499 // to register constructor/destructor for variable. 2500 llvm::Value *Args[] = { 2501 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2502 Ctor, CopyCtor, Dtor}; 2503 CGF.EmitRuntimeCall( 2504 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2505 } 2506 2507 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2508 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2509 bool PerformInit, CodeGenFunction *CGF) { 2510 if (CGM.getLangOpts().OpenMPUseTLS && 2511 CGM.getContext().getTargetInfo().isTLSSupported()) 2512 return nullptr; 2513 2514 VD = VD->getDefinition(CGM.getContext()); 2515 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2516 QualType ASTTy = VD->getType(); 2517 2518 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2519 const Expr *Init = VD->getAnyInitializer(); 2520 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2521 // Generate function that re-emits the declaration's initializer into the 2522 // threadprivate copy of the variable VD 2523 CodeGenFunction CtorCGF(CGM); 2524 FunctionArgList Args; 2525 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2526 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2527 ImplicitParamDecl::Other); 2528 Args.push_back(&Dst); 2529 2530 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2531 CGM.getContext().VoidPtrTy, Args); 2532 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2533 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2534 llvm::Function *Fn = 2535 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2536 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2537 Args, Loc, Loc); 2538 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2539 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2540 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2541 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2542 Arg = CtorCGF.Builder.CreateElementBitCast( 2543 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2544 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2545 /*IsInitializer=*/true); 2546 ArgVal = CtorCGF.EmitLoadOfScalar( 2547 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2548 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2549 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2550 CtorCGF.FinishFunction(); 2551 Ctor = Fn; 2552 } 2553 if (VD->getType().isDestructedType() != QualType::DK_none) { 2554 // Generate function that emits destructor call for the threadprivate copy 2555 // of the variable VD 2556 CodeGenFunction DtorCGF(CGM); 2557 FunctionArgList Args; 2558 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2559 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2560 ImplicitParamDecl::Other); 2561 Args.push_back(&Dst); 2562 2563 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2564 CGM.getContext().VoidTy, Args); 2565 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2566 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2567 llvm::Function *Fn = 2568 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2569 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2570 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2571 Loc, Loc); 2572 // Create a scope with an artificial location for the body of this function. 2573 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2574 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2575 DtorCGF.GetAddrOfLocalVar(&Dst), 2576 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2577 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2578 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2579 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2580 DtorCGF.FinishFunction(); 2581 Dtor = Fn; 2582 } 2583 // Do not emit init function if it is not required. 2584 if (!Ctor && !Dtor) 2585 return nullptr; 2586 2587 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2588 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2589 /*isVarArg=*/false) 2590 ->getPointerTo(); 2591 // Copying constructor for the threadprivate variable. 2592 // Must be NULL - reserved by runtime, but currently it requires that this 2593 // parameter is always NULL. Otherwise it fires assertion. 2594 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2595 if (Ctor == nullptr) { 2596 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2597 /*isVarArg=*/false) 2598 ->getPointerTo(); 2599 Ctor = llvm::Constant::getNullValue(CtorTy); 2600 } 2601 if (Dtor == nullptr) { 2602 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2603 /*isVarArg=*/false) 2604 ->getPointerTo(); 2605 Dtor = llvm::Constant::getNullValue(DtorTy); 2606 } 2607 if (!CGF) { 2608 auto *InitFunctionTy = 2609 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2610 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2611 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2612 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2613 CodeGenFunction InitCGF(CGM); 2614 FunctionArgList ArgList; 2615 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2616 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2617 Loc, Loc); 2618 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2619 InitCGF.FinishFunction(); 2620 return InitFunction; 2621 } 2622 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2623 } 2624 return nullptr; 2625 } 2626 2627 /// Obtain information that uniquely identifies a target entry. This 2628 /// consists of the file and device IDs as well as line number associated with 2629 /// the relevant entry source location. 2630 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2631 unsigned &DeviceID, unsigned &FileID, 2632 unsigned &LineNum) { 2633 SourceManager &SM = C.getSourceManager(); 2634 2635 // The loc should be always valid and have a file ID (the user cannot use 2636 // #pragma directives in macros) 2637 2638 assert(Loc.isValid() && "Source location is expected to be always valid."); 2639 2640 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2641 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2642 2643 llvm::sys::fs::UniqueID ID; 2644 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2645 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2646 << PLoc.getFilename() << EC.message(); 2647 2648 DeviceID = ID.getDevice(); 2649 FileID = ID.getFile(); 2650 LineNum = PLoc.getLine(); 2651 } 2652 2653 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2654 llvm::GlobalVariable *Addr, 2655 bool PerformInit) { 2656 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2657 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2658 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) 2659 return CGM.getLangOpts().OpenMPIsDevice; 2660 VD = VD->getDefinition(CGM.getContext()); 2661 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2662 return CGM.getLangOpts().OpenMPIsDevice; 2663 2664 QualType ASTTy = VD->getType(); 2665 2666 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2667 // Produce the unique prefix to identify the new target regions. We use 2668 // the source location of the variable declaration which we know to not 2669 // conflict with any target region. 2670 unsigned DeviceID; 2671 unsigned FileID; 2672 unsigned Line; 2673 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2674 SmallString<128> Buffer, Out; 2675 { 2676 llvm::raw_svector_ostream OS(Buffer); 2677 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2678 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2679 } 2680 2681 const Expr *Init = VD->getAnyInitializer(); 2682 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2683 llvm::Constant *Ctor; 2684 llvm::Constant *ID; 2685 if (CGM.getLangOpts().OpenMPIsDevice) { 2686 // Generate function that re-emits the declaration's initializer into 2687 // the threadprivate copy of the variable VD 2688 CodeGenFunction CtorCGF(CGM); 2689 2690 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2691 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2692 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2693 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2694 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2695 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2696 FunctionArgList(), Loc, Loc); 2697 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2698 CtorCGF.EmitAnyExprToMem(Init, 2699 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2700 Init->getType().getQualifiers(), 2701 /*IsInitializer=*/true); 2702 CtorCGF.FinishFunction(); 2703 Ctor = Fn; 2704 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2705 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2706 } else { 2707 Ctor = new llvm::GlobalVariable( 2708 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2709 llvm::GlobalValue::PrivateLinkage, 2710 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2711 ID = Ctor; 2712 } 2713 2714 // Register the information for the entry associated with the constructor. 2715 Out.clear(); 2716 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2717 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2718 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2719 } 2720 if (VD->getType().isDestructedType() != QualType::DK_none) { 2721 llvm::Constant *Dtor; 2722 llvm::Constant *ID; 2723 if (CGM.getLangOpts().OpenMPIsDevice) { 2724 // Generate function that emits destructor call for the threadprivate 2725 // copy of the variable VD 2726 CodeGenFunction DtorCGF(CGM); 2727 2728 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2729 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2730 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2731 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2732 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2733 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2734 FunctionArgList(), Loc, Loc); 2735 // Create a scope with an artificial location for the body of this 2736 // function. 2737 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2738 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2739 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2740 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2741 DtorCGF.FinishFunction(); 2742 Dtor = Fn; 2743 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2744 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2745 } else { 2746 Dtor = new llvm::GlobalVariable( 2747 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2748 llvm::GlobalValue::PrivateLinkage, 2749 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2750 ID = Dtor; 2751 } 2752 // Register the information for the entry associated with the destructor. 2753 Out.clear(); 2754 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2755 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2756 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2757 } 2758 return CGM.getLangOpts().OpenMPIsDevice; 2759 } 2760 2761 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2762 QualType VarType, 2763 StringRef Name) { 2764 std::string Suffix = getName({"artificial", ""}); 2765 std::string CacheSuffix = getName({"cache", ""}); 2766 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2767 llvm::Value *GAddr = 2768 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2769 llvm::Value *Args[] = { 2770 emitUpdateLocation(CGF, SourceLocation()), 2771 getThreadID(CGF, SourceLocation()), 2772 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2773 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2774 /*IsSigned=*/false), 2775 getOrCreateInternalVariable( 2776 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2777 return Address( 2778 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2779 CGF.EmitRuntimeCall( 2780 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2781 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2782 CGM.getPointerAlign()); 2783 } 2784 2785 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2786 const RegionCodeGenTy &ThenGen, 2787 const RegionCodeGenTy &ElseGen) { 2788 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2789 2790 // If the condition constant folds and can be elided, try to avoid emitting 2791 // the condition and the dead arm of the if/else. 2792 bool CondConstant; 2793 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2794 if (CondConstant) 2795 ThenGen(CGF); 2796 else 2797 ElseGen(CGF); 2798 return; 2799 } 2800 2801 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2802 // emit the conditional branch. 2803 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2804 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2805 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2806 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2807 2808 // Emit the 'then' code. 2809 CGF.EmitBlock(ThenBlock); 2810 ThenGen(CGF); 2811 CGF.EmitBranch(ContBlock); 2812 // Emit the 'else' code if present. 2813 // There is no need to emit line number for unconditional branch. 2814 (void)ApplyDebugLocation::CreateEmpty(CGF); 2815 CGF.EmitBlock(ElseBlock); 2816 ElseGen(CGF); 2817 // There is no need to emit line number for unconditional branch. 2818 (void)ApplyDebugLocation::CreateEmpty(CGF); 2819 CGF.EmitBranch(ContBlock); 2820 // Emit the continuation block for code after the if. 2821 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2822 } 2823 2824 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2825 llvm::Value *OutlinedFn, 2826 ArrayRef<llvm::Value *> CapturedVars, 2827 const Expr *IfCond) { 2828 if (!CGF.HaveInsertPoint()) 2829 return; 2830 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2831 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2832 PrePostActionTy &) { 2833 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2834 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2835 llvm::Value *Args[] = { 2836 RTLoc, 2837 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2838 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2839 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2840 RealArgs.append(std::begin(Args), std::end(Args)); 2841 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2842 2843 llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2844 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2845 }; 2846 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2847 PrePostActionTy &) { 2848 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2849 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2850 // Build calls: 2851 // __kmpc_serialized_parallel(&Loc, GTid); 2852 llvm::Value *Args[] = {RTLoc, ThreadID}; 2853 CGF.EmitRuntimeCall( 2854 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2855 2856 // OutlinedFn(>id, &zero, CapturedStruct); 2857 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2858 /*Name*/ ".zero.addr"); 2859 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2860 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2861 // ThreadId for serialized parallels is 0. 2862 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2863 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2864 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2865 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2866 2867 // __kmpc_end_serialized_parallel(&Loc, GTid); 2868 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2869 CGF.EmitRuntimeCall( 2870 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2871 EndArgs); 2872 }; 2873 if (IfCond) { 2874 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2875 } else { 2876 RegionCodeGenTy ThenRCG(ThenGen); 2877 ThenRCG(CGF); 2878 } 2879 } 2880 2881 // If we're inside an (outlined) parallel region, use the region info's 2882 // thread-ID variable (it is passed in a first argument of the outlined function 2883 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2884 // regular serial code region, get thread ID by calling kmp_int32 2885 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2886 // return the address of that temp. 2887 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2888 SourceLocation Loc) { 2889 if (auto *OMPRegionInfo = 2890 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2891 if (OMPRegionInfo->getThreadIDVariable()) 2892 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2893 2894 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2895 QualType Int32Ty = 2896 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2897 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2898 CGF.EmitStoreOfScalar(ThreadID, 2899 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2900 2901 return ThreadIDTemp; 2902 } 2903 2904 llvm::Constant * 2905 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2906 const llvm::Twine &Name) { 2907 SmallString<256> Buffer; 2908 llvm::raw_svector_ostream Out(Buffer); 2909 Out << Name; 2910 StringRef RuntimeName = Out.str(); 2911 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2912 if (Elem.second) { 2913 assert(Elem.second->getType()->getPointerElementType() == Ty && 2914 "OMP internal variable has different type than requested"); 2915 return &*Elem.second; 2916 } 2917 2918 return Elem.second = new llvm::GlobalVariable( 2919 CGM.getModule(), Ty, /*IsConstant*/ false, 2920 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2921 Elem.first()); 2922 } 2923 2924 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2925 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2926 std::string Name = getName({Prefix, "var"}); 2927 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2928 } 2929 2930 namespace { 2931 /// Common pre(post)-action for different OpenMP constructs. 2932 class CommonActionTy final : public PrePostActionTy { 2933 llvm::Value *EnterCallee; 2934 ArrayRef<llvm::Value *> EnterArgs; 2935 llvm::Value *ExitCallee; 2936 ArrayRef<llvm::Value *> ExitArgs; 2937 bool Conditional; 2938 llvm::BasicBlock *ContBlock = nullptr; 2939 2940 public: 2941 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2942 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2943 bool Conditional = false) 2944 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2945 ExitArgs(ExitArgs), Conditional(Conditional) {} 2946 void Enter(CodeGenFunction &CGF) override { 2947 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2948 if (Conditional) { 2949 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2950 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2951 ContBlock = CGF.createBasicBlock("omp_if.end"); 2952 // Generate the branch (If-stmt) 2953 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2954 CGF.EmitBlock(ThenBlock); 2955 } 2956 } 2957 void Done(CodeGenFunction &CGF) { 2958 // Emit the rest of blocks/branches 2959 CGF.EmitBranch(ContBlock); 2960 CGF.EmitBlock(ContBlock, true); 2961 } 2962 void Exit(CodeGenFunction &CGF) override { 2963 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2964 } 2965 }; 2966 } // anonymous namespace 2967 2968 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2969 StringRef CriticalName, 2970 const RegionCodeGenTy &CriticalOpGen, 2971 SourceLocation Loc, const Expr *Hint) { 2972 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2973 // CriticalOpGen(); 2974 // __kmpc_end_critical(ident_t *, gtid, Lock); 2975 // Prepare arguments and build a call to __kmpc_critical 2976 if (!CGF.HaveInsertPoint()) 2977 return; 2978 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2979 getCriticalRegionLock(CriticalName)}; 2980 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2981 std::end(Args)); 2982 if (Hint) { 2983 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2984 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2985 } 2986 CommonActionTy Action( 2987 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2988 : OMPRTL__kmpc_critical), 2989 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2990 CriticalOpGen.setAction(Action); 2991 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2992 } 2993 2994 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2995 const RegionCodeGenTy &MasterOpGen, 2996 SourceLocation Loc) { 2997 if (!CGF.HaveInsertPoint()) 2998 return; 2999 // if(__kmpc_master(ident_t *, gtid)) { 3000 // MasterOpGen(); 3001 // __kmpc_end_master(ident_t *, gtid); 3002 // } 3003 // Prepare arguments and build a call to __kmpc_master 3004 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3005 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3006 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3007 /*Conditional=*/true); 3008 MasterOpGen.setAction(Action); 3009 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3010 Action.Done(CGF); 3011 } 3012 3013 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3014 SourceLocation Loc) { 3015 if (!CGF.HaveInsertPoint()) 3016 return; 3017 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3018 llvm::Value *Args[] = { 3019 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3020 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3021 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3022 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3023 Region->emitUntiedSwitch(CGF); 3024 } 3025 3026 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3027 const RegionCodeGenTy &TaskgroupOpGen, 3028 SourceLocation Loc) { 3029 if (!CGF.HaveInsertPoint()) 3030 return; 3031 // __kmpc_taskgroup(ident_t *, gtid); 3032 // TaskgroupOpGen(); 3033 // __kmpc_end_taskgroup(ident_t *, gtid); 3034 // Prepare arguments and build a call to __kmpc_taskgroup 3035 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3036 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3037 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3038 Args); 3039 TaskgroupOpGen.setAction(Action); 3040 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3041 } 3042 3043 /// Given an array of pointers to variables, project the address of a 3044 /// given variable. 3045 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3046 unsigned Index, const VarDecl *Var) { 3047 // Pull out the pointer to the variable. 3048 Address PtrAddr = 3049 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 3050 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3051 3052 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3053 Addr = CGF.Builder.CreateElementBitCast( 3054 Addr, CGF.ConvertTypeForMem(Var->getType())); 3055 return Addr; 3056 } 3057 3058 static llvm::Value *emitCopyprivateCopyFunction( 3059 CodeGenModule &CGM, llvm::Type *ArgsType, 3060 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3061 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3062 SourceLocation Loc) { 3063 ASTContext &C = CGM.getContext(); 3064 // void copy_func(void *LHSArg, void *RHSArg); 3065 FunctionArgList Args; 3066 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3067 ImplicitParamDecl::Other); 3068 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3069 ImplicitParamDecl::Other); 3070 Args.push_back(&LHSArg); 3071 Args.push_back(&RHSArg); 3072 const auto &CGFI = 3073 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3074 std::string Name = 3075 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3076 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3077 llvm::GlobalValue::InternalLinkage, Name, 3078 &CGM.getModule()); 3079 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3080 Fn->setDoesNotRecurse(); 3081 CodeGenFunction CGF(CGM); 3082 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3083 // Dest = (void*[n])(LHSArg); 3084 // Src = (void*[n])(RHSArg); 3085 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3086 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3087 ArgsType), CGF.getPointerAlign()); 3088 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3089 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3090 ArgsType), CGF.getPointerAlign()); 3091 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3092 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3093 // ... 3094 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3095 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3096 const auto *DestVar = 3097 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3098 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3099 3100 const auto *SrcVar = 3101 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3102 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3103 3104 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3105 QualType Type = VD->getType(); 3106 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3107 } 3108 CGF.FinishFunction(); 3109 return Fn; 3110 } 3111 3112 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3113 const RegionCodeGenTy &SingleOpGen, 3114 SourceLocation Loc, 3115 ArrayRef<const Expr *> CopyprivateVars, 3116 ArrayRef<const Expr *> SrcExprs, 3117 ArrayRef<const Expr *> DstExprs, 3118 ArrayRef<const Expr *> AssignmentOps) { 3119 if (!CGF.HaveInsertPoint()) 3120 return; 3121 assert(CopyprivateVars.size() == SrcExprs.size() && 3122 CopyprivateVars.size() == DstExprs.size() && 3123 CopyprivateVars.size() == AssignmentOps.size()); 3124 ASTContext &C = CGM.getContext(); 3125 // int32 did_it = 0; 3126 // if(__kmpc_single(ident_t *, gtid)) { 3127 // SingleOpGen(); 3128 // __kmpc_end_single(ident_t *, gtid); 3129 // did_it = 1; 3130 // } 3131 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3132 // <copy_func>, did_it); 3133 3134 Address DidIt = Address::invalid(); 3135 if (!CopyprivateVars.empty()) { 3136 // int32 did_it = 0; 3137 QualType KmpInt32Ty = 3138 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3139 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3140 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3141 } 3142 // Prepare arguments and build a call to __kmpc_single 3143 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3144 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3145 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3146 /*Conditional=*/true); 3147 SingleOpGen.setAction(Action); 3148 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3149 if (DidIt.isValid()) { 3150 // did_it = 1; 3151 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3152 } 3153 Action.Done(CGF); 3154 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3155 // <copy_func>, did_it); 3156 if (DidIt.isValid()) { 3157 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3158 QualType CopyprivateArrayTy = 3159 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3160 /*IndexTypeQuals=*/0); 3161 // Create a list of all private variables for copyprivate. 3162 Address CopyprivateList = 3163 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3164 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3165 Address Elem = CGF.Builder.CreateConstArrayGEP( 3166 CopyprivateList, I, CGF.getPointerSize()); 3167 CGF.Builder.CreateStore( 3168 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3169 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3170 Elem); 3171 } 3172 // Build function that copies private values from single region to all other 3173 // threads in the corresponding parallel region. 3174 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3175 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3176 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3177 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3178 Address CL = 3179 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3180 CGF.VoidPtrTy); 3181 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3182 llvm::Value *Args[] = { 3183 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3184 getThreadID(CGF, Loc), // i32 <gtid> 3185 BufSize, // size_t <buf_size> 3186 CL.getPointer(), // void *<copyprivate list> 3187 CpyFn, // void (*) (void *, void *) <copy_func> 3188 DidItVal // i32 did_it 3189 }; 3190 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3191 } 3192 } 3193 3194 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3195 const RegionCodeGenTy &OrderedOpGen, 3196 SourceLocation Loc, bool IsThreads) { 3197 if (!CGF.HaveInsertPoint()) 3198 return; 3199 // __kmpc_ordered(ident_t *, gtid); 3200 // OrderedOpGen(); 3201 // __kmpc_end_ordered(ident_t *, gtid); 3202 // Prepare arguments and build a call to __kmpc_ordered 3203 if (IsThreads) { 3204 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3205 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3206 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3207 Args); 3208 OrderedOpGen.setAction(Action); 3209 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3210 return; 3211 } 3212 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3213 } 3214 3215 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3216 OpenMPDirectiveKind Kind, bool EmitChecks, 3217 bool ForceSimpleCall) { 3218 if (!CGF.HaveInsertPoint()) 3219 return; 3220 // Build call __kmpc_cancel_barrier(loc, thread_id); 3221 // Build call __kmpc_barrier(loc, thread_id); 3222 unsigned Flags; 3223 if (Kind == OMPD_for) 3224 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3225 else if (Kind == OMPD_sections) 3226 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3227 else if (Kind == OMPD_single) 3228 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3229 else if (Kind == OMPD_barrier) 3230 Flags = OMP_IDENT_BARRIER_EXPL; 3231 else 3232 Flags = OMP_IDENT_BARRIER_IMPL; 3233 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3234 // thread_id); 3235 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3236 getThreadID(CGF, Loc)}; 3237 if (auto *OMPRegionInfo = 3238 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3239 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3240 llvm::Value *Result = CGF.EmitRuntimeCall( 3241 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3242 if (EmitChecks) { 3243 // if (__kmpc_cancel_barrier()) { 3244 // exit from construct; 3245 // } 3246 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3247 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3248 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3249 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3250 CGF.EmitBlock(ExitBB); 3251 // exit from construct; 3252 CodeGenFunction::JumpDest CancelDestination = 3253 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3254 CGF.EmitBranchThroughCleanup(CancelDestination); 3255 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3256 } 3257 return; 3258 } 3259 } 3260 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3261 } 3262 3263 /// Map the OpenMP loop schedule to the runtime enumeration. 3264 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3265 bool Chunked, bool Ordered) { 3266 switch (ScheduleKind) { 3267 case OMPC_SCHEDULE_static: 3268 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3269 : (Ordered ? OMP_ord_static : OMP_sch_static); 3270 case OMPC_SCHEDULE_dynamic: 3271 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3272 case OMPC_SCHEDULE_guided: 3273 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3274 case OMPC_SCHEDULE_runtime: 3275 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3276 case OMPC_SCHEDULE_auto: 3277 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3278 case OMPC_SCHEDULE_unknown: 3279 assert(!Chunked && "chunk was specified but schedule kind not known"); 3280 return Ordered ? OMP_ord_static : OMP_sch_static; 3281 } 3282 llvm_unreachable("Unexpected runtime schedule"); 3283 } 3284 3285 /// Map the OpenMP distribute schedule to the runtime enumeration. 3286 static OpenMPSchedType 3287 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3288 // only static is allowed for dist_schedule 3289 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3290 } 3291 3292 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3293 bool Chunked) const { 3294 OpenMPSchedType Schedule = 3295 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3296 return Schedule == OMP_sch_static; 3297 } 3298 3299 bool CGOpenMPRuntime::isStaticNonchunked( 3300 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3301 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3302 return Schedule == OMP_dist_sch_static; 3303 } 3304 3305 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3306 bool Chunked) const { 3307 OpenMPSchedType Schedule = 3308 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3309 return Schedule == OMP_sch_static_chunked; 3310 } 3311 3312 bool CGOpenMPRuntime::isStaticChunked( 3313 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3314 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3315 return Schedule == OMP_dist_sch_static_chunked; 3316 } 3317 3318 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3319 OpenMPSchedType Schedule = 3320 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3321 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3322 return Schedule != OMP_sch_static; 3323 } 3324 3325 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3326 OpenMPScheduleClauseModifier M1, 3327 OpenMPScheduleClauseModifier M2) { 3328 int Modifier = 0; 3329 switch (M1) { 3330 case OMPC_SCHEDULE_MODIFIER_monotonic: 3331 Modifier = OMP_sch_modifier_monotonic; 3332 break; 3333 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3334 Modifier = OMP_sch_modifier_nonmonotonic; 3335 break; 3336 case OMPC_SCHEDULE_MODIFIER_simd: 3337 if (Schedule == OMP_sch_static_chunked) 3338 Schedule = OMP_sch_static_balanced_chunked; 3339 break; 3340 case OMPC_SCHEDULE_MODIFIER_last: 3341 case OMPC_SCHEDULE_MODIFIER_unknown: 3342 break; 3343 } 3344 switch (M2) { 3345 case OMPC_SCHEDULE_MODIFIER_monotonic: 3346 Modifier = OMP_sch_modifier_monotonic; 3347 break; 3348 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3349 Modifier = OMP_sch_modifier_nonmonotonic; 3350 break; 3351 case OMPC_SCHEDULE_MODIFIER_simd: 3352 if (Schedule == OMP_sch_static_chunked) 3353 Schedule = OMP_sch_static_balanced_chunked; 3354 break; 3355 case OMPC_SCHEDULE_MODIFIER_last: 3356 case OMPC_SCHEDULE_MODIFIER_unknown: 3357 break; 3358 } 3359 return Schedule | Modifier; 3360 } 3361 3362 void CGOpenMPRuntime::emitForDispatchInit( 3363 CodeGenFunction &CGF, SourceLocation Loc, 3364 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3365 bool Ordered, const DispatchRTInput &DispatchValues) { 3366 if (!CGF.HaveInsertPoint()) 3367 return; 3368 OpenMPSchedType Schedule = getRuntimeSchedule( 3369 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3370 assert(Ordered || 3371 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3372 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3373 Schedule != OMP_sch_static_balanced_chunked)); 3374 // Call __kmpc_dispatch_init( 3375 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3376 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3377 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3378 3379 // If the Chunk was not specified in the clause - use default value 1. 3380 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3381 : CGF.Builder.getIntN(IVSize, 1); 3382 llvm::Value *Args[] = { 3383 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3384 CGF.Builder.getInt32(addMonoNonMonoModifier( 3385 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3386 DispatchValues.LB, // Lower 3387 DispatchValues.UB, // Upper 3388 CGF.Builder.getIntN(IVSize, 1), // Stride 3389 Chunk // Chunk 3390 }; 3391 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3392 } 3393 3394 static void emitForStaticInitCall( 3395 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3396 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 3397 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3398 const CGOpenMPRuntime::StaticRTInput &Values) { 3399 if (!CGF.HaveInsertPoint()) 3400 return; 3401 3402 assert(!Values.Ordered); 3403 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3404 Schedule == OMP_sch_static_balanced_chunked || 3405 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3406 Schedule == OMP_dist_sch_static || 3407 Schedule == OMP_dist_sch_static_chunked); 3408 3409 // Call __kmpc_for_static_init( 3410 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3411 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3412 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3413 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3414 llvm::Value *Chunk = Values.Chunk; 3415 if (Chunk == nullptr) { 3416 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3417 Schedule == OMP_dist_sch_static) && 3418 "expected static non-chunked schedule"); 3419 // If the Chunk was not specified in the clause - use default value 1. 3420 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3421 } else { 3422 assert((Schedule == OMP_sch_static_chunked || 3423 Schedule == OMP_sch_static_balanced_chunked || 3424 Schedule == OMP_ord_static_chunked || 3425 Schedule == OMP_dist_sch_static_chunked) && 3426 "expected static chunked schedule"); 3427 } 3428 llvm::Value *Args[] = { 3429 UpdateLocation, 3430 ThreadId, 3431 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3432 M2)), // Schedule type 3433 Values.IL.getPointer(), // &isLastIter 3434 Values.LB.getPointer(), // &LB 3435 Values.UB.getPointer(), // &UB 3436 Values.ST.getPointer(), // &Stride 3437 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3438 Chunk // Chunk 3439 }; 3440 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3441 } 3442 3443 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3444 SourceLocation Loc, 3445 OpenMPDirectiveKind DKind, 3446 const OpenMPScheduleTy &ScheduleKind, 3447 const StaticRTInput &Values) { 3448 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3449 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3450 assert(isOpenMPWorksharingDirective(DKind) && 3451 "Expected loop-based or sections-based directive."); 3452 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3453 isOpenMPLoopDirective(DKind) 3454 ? OMP_IDENT_WORK_LOOP 3455 : OMP_IDENT_WORK_SECTIONS); 3456 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3457 llvm::Constant *StaticInitFunction = 3458 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3459 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3460 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3461 } 3462 3463 void CGOpenMPRuntime::emitDistributeStaticInit( 3464 CodeGenFunction &CGF, SourceLocation Loc, 3465 OpenMPDistScheduleClauseKind SchedKind, 3466 const CGOpenMPRuntime::StaticRTInput &Values) { 3467 OpenMPSchedType ScheduleNum = 3468 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3469 llvm::Value *UpdatedLocation = 3470 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3471 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3472 llvm::Constant *StaticInitFunction = 3473 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3474 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3475 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3476 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3477 } 3478 3479 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3480 SourceLocation Loc, 3481 OpenMPDirectiveKind DKind) { 3482 if (!CGF.HaveInsertPoint()) 3483 return; 3484 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3485 llvm::Value *Args[] = { 3486 emitUpdateLocation(CGF, Loc, 3487 isOpenMPDistributeDirective(DKind) 3488 ? OMP_IDENT_WORK_DISTRIBUTE 3489 : isOpenMPLoopDirective(DKind) 3490 ? OMP_IDENT_WORK_LOOP 3491 : OMP_IDENT_WORK_SECTIONS), 3492 getThreadID(CGF, Loc)}; 3493 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3494 Args); 3495 } 3496 3497 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3498 SourceLocation Loc, 3499 unsigned IVSize, 3500 bool IVSigned) { 3501 if (!CGF.HaveInsertPoint()) 3502 return; 3503 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3504 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3505 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3506 } 3507 3508 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3509 SourceLocation Loc, unsigned IVSize, 3510 bool IVSigned, Address IL, 3511 Address LB, Address UB, 3512 Address ST) { 3513 // Call __kmpc_dispatch_next( 3514 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3515 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3516 // kmp_int[32|64] *p_stride); 3517 llvm::Value *Args[] = { 3518 emitUpdateLocation(CGF, Loc), 3519 getThreadID(CGF, Loc), 3520 IL.getPointer(), // &isLastIter 3521 LB.getPointer(), // &Lower 3522 UB.getPointer(), // &Upper 3523 ST.getPointer() // &Stride 3524 }; 3525 llvm::Value *Call = 3526 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3527 return CGF.EmitScalarConversion( 3528 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3529 CGF.getContext().BoolTy, Loc); 3530 } 3531 3532 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3533 llvm::Value *NumThreads, 3534 SourceLocation Loc) { 3535 if (!CGF.HaveInsertPoint()) 3536 return; 3537 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3538 llvm::Value *Args[] = { 3539 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3540 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3541 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3542 Args); 3543 } 3544 3545 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3546 OpenMPProcBindClauseKind ProcBind, 3547 SourceLocation Loc) { 3548 if (!CGF.HaveInsertPoint()) 3549 return; 3550 // Constants for proc bind value accepted by the runtime. 3551 enum ProcBindTy { 3552 ProcBindFalse = 0, 3553 ProcBindTrue, 3554 ProcBindMaster, 3555 ProcBindClose, 3556 ProcBindSpread, 3557 ProcBindIntel, 3558 ProcBindDefault 3559 } RuntimeProcBind; 3560 switch (ProcBind) { 3561 case OMPC_PROC_BIND_master: 3562 RuntimeProcBind = ProcBindMaster; 3563 break; 3564 case OMPC_PROC_BIND_close: 3565 RuntimeProcBind = ProcBindClose; 3566 break; 3567 case OMPC_PROC_BIND_spread: 3568 RuntimeProcBind = ProcBindSpread; 3569 break; 3570 case OMPC_PROC_BIND_unknown: 3571 llvm_unreachable("Unsupported proc_bind value."); 3572 } 3573 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3574 llvm::Value *Args[] = { 3575 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3576 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3577 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3578 } 3579 3580 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3581 SourceLocation Loc) { 3582 if (!CGF.HaveInsertPoint()) 3583 return; 3584 // Build call void __kmpc_flush(ident_t *loc) 3585 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3586 emitUpdateLocation(CGF, Loc)); 3587 } 3588 3589 namespace { 3590 /// Indexes of fields for type kmp_task_t. 3591 enum KmpTaskTFields { 3592 /// List of shared variables. 3593 KmpTaskTShareds, 3594 /// Task routine. 3595 KmpTaskTRoutine, 3596 /// Partition id for the untied tasks. 3597 KmpTaskTPartId, 3598 /// Function with call of destructors for private variables. 3599 Data1, 3600 /// Task priority. 3601 Data2, 3602 /// (Taskloops only) Lower bound. 3603 KmpTaskTLowerBound, 3604 /// (Taskloops only) Upper bound. 3605 KmpTaskTUpperBound, 3606 /// (Taskloops only) Stride. 3607 KmpTaskTStride, 3608 /// (Taskloops only) Is last iteration flag. 3609 KmpTaskTLastIter, 3610 /// (Taskloops only) Reduction data. 3611 KmpTaskTReductions, 3612 }; 3613 } // anonymous namespace 3614 3615 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3616 return OffloadEntriesTargetRegion.empty() && 3617 OffloadEntriesDeviceGlobalVar.empty(); 3618 } 3619 3620 /// Initialize target region entry. 3621 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3622 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3623 StringRef ParentName, unsigned LineNum, 3624 unsigned Order) { 3625 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3626 "only required for the device " 3627 "code generation."); 3628 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3629 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3630 OMPTargetRegionEntryTargetRegion); 3631 ++OffloadingEntriesNum; 3632 } 3633 3634 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3635 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3636 StringRef ParentName, unsigned LineNum, 3637 llvm::Constant *Addr, llvm::Constant *ID, 3638 OMPTargetRegionEntryKind Flags) { 3639 // If we are emitting code for a target, the entry is already initialized, 3640 // only has to be registered. 3641 if (CGM.getLangOpts().OpenMPIsDevice) { 3642 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3643 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3644 DiagnosticsEngine::Error, 3645 "Unable to find target region on line '%0' in the device code."); 3646 CGM.getDiags().Report(DiagID) << LineNum; 3647 return; 3648 } 3649 auto &Entry = 3650 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3651 assert(Entry.isValid() && "Entry not initialized!"); 3652 Entry.setAddress(Addr); 3653 Entry.setID(ID); 3654 Entry.setFlags(Flags); 3655 } else { 3656 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3657 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3658 ++OffloadingEntriesNum; 3659 } 3660 } 3661 3662 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3663 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3664 unsigned LineNum) const { 3665 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3666 if (PerDevice == OffloadEntriesTargetRegion.end()) 3667 return false; 3668 auto PerFile = PerDevice->second.find(FileID); 3669 if (PerFile == PerDevice->second.end()) 3670 return false; 3671 auto PerParentName = PerFile->second.find(ParentName); 3672 if (PerParentName == PerFile->second.end()) 3673 return false; 3674 auto PerLine = PerParentName->second.find(LineNum); 3675 if (PerLine == PerParentName->second.end()) 3676 return false; 3677 // Fail if this entry is already registered. 3678 if (PerLine->second.getAddress() || PerLine->second.getID()) 3679 return false; 3680 return true; 3681 } 3682 3683 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3684 const OffloadTargetRegionEntryInfoActTy &Action) { 3685 // Scan all target region entries and perform the provided action. 3686 for (const auto &D : OffloadEntriesTargetRegion) 3687 for (const auto &F : D.second) 3688 for (const auto &P : F.second) 3689 for (const auto &L : P.second) 3690 Action(D.first, F.first, P.first(), L.first, L.second); 3691 } 3692 3693 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3694 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3695 OMPTargetGlobalVarEntryKind Flags, 3696 unsigned Order) { 3697 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3698 "only required for the device " 3699 "code generation."); 3700 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3701 ++OffloadingEntriesNum; 3702 } 3703 3704 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3705 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3706 CharUnits VarSize, 3707 OMPTargetGlobalVarEntryKind Flags, 3708 llvm::GlobalValue::LinkageTypes Linkage) { 3709 if (CGM.getLangOpts().OpenMPIsDevice) { 3710 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3711 assert(Entry.isValid() && Entry.getFlags() == Flags && 3712 "Entry not initialized!"); 3713 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3714 "Resetting with the new address."); 3715 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) 3716 return; 3717 Entry.setAddress(Addr); 3718 Entry.setVarSize(VarSize); 3719 Entry.setLinkage(Linkage); 3720 } else { 3721 if (hasDeviceGlobalVarEntryInfo(VarName)) 3722 return; 3723 OffloadEntriesDeviceGlobalVar.try_emplace( 3724 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3725 ++OffloadingEntriesNum; 3726 } 3727 } 3728 3729 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3730 actOnDeviceGlobalVarEntriesInfo( 3731 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3732 // Scan all target region entries and perform the provided action. 3733 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3734 Action(E.getKey(), E.getValue()); 3735 } 3736 3737 llvm::Function * 3738 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3739 // If we don't have entries or if we are emitting code for the device, we 3740 // don't need to do anything. 3741 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3742 return nullptr; 3743 3744 llvm::Module &M = CGM.getModule(); 3745 ASTContext &C = CGM.getContext(); 3746 3747 // Get list of devices we care about 3748 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3749 3750 // We should be creating an offloading descriptor only if there are devices 3751 // specified. 3752 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3753 3754 // Create the external variables that will point to the begin and end of the 3755 // host entries section. These will be defined by the linker. 3756 llvm::Type *OffloadEntryTy = 3757 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3758 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3759 auto *HostEntriesBegin = new llvm::GlobalVariable( 3760 M, OffloadEntryTy, /*isConstant=*/true, 3761 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3762 EntriesBeginName); 3763 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3764 auto *HostEntriesEnd = 3765 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3766 llvm::GlobalValue::ExternalLinkage, 3767 /*Initializer=*/nullptr, EntriesEndName); 3768 3769 // Create all device images 3770 auto *DeviceImageTy = cast<llvm::StructType>( 3771 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3772 ConstantInitBuilder DeviceImagesBuilder(CGM); 3773 ConstantArrayBuilder DeviceImagesEntries = 3774 DeviceImagesBuilder.beginArray(DeviceImageTy); 3775 3776 for (const llvm::Triple &Device : Devices) { 3777 StringRef T = Device.getTriple(); 3778 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3779 auto *ImgBegin = new llvm::GlobalVariable( 3780 M, CGM.Int8Ty, /*isConstant=*/true, 3781 llvm::GlobalValue::ExternalWeakLinkage, 3782 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3783 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3784 auto *ImgEnd = new llvm::GlobalVariable( 3785 M, CGM.Int8Ty, /*isConstant=*/true, 3786 llvm::GlobalValue::ExternalWeakLinkage, 3787 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3788 3789 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3790 HostEntriesEnd}; 3791 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3792 DeviceImagesEntries); 3793 } 3794 3795 // Create device images global array. 3796 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3797 llvm::GlobalVariable *DeviceImages = 3798 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 3799 CGM.getPointerAlign(), 3800 /*isConstant=*/true); 3801 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3802 3803 // This is a Zero array to be used in the creation of the constant expressions 3804 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3805 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3806 3807 // Create the target region descriptor. 3808 llvm::Constant *Data[] = { 3809 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 3810 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3811 DeviceImages, Index), 3812 HostEntriesBegin, HostEntriesEnd}; 3813 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 3814 llvm::GlobalVariable *Desc = createGlobalStruct( 3815 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 3816 3817 // Emit code to register or unregister the descriptor at execution 3818 // startup or closing, respectively. 3819 3820 llvm::Function *UnRegFn; 3821 { 3822 FunctionArgList Args; 3823 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3824 Args.push_back(&DummyPtr); 3825 3826 CodeGenFunction CGF(CGM); 3827 // Disable debug info for global (de-)initializer because they are not part 3828 // of some particular construct. 3829 CGF.disableDebugInfo(); 3830 const auto &FI = 3831 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3832 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3833 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 3834 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 3835 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 3836 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3837 Desc); 3838 CGF.FinishFunction(); 3839 } 3840 llvm::Function *RegFn; 3841 { 3842 CodeGenFunction CGF(CGM); 3843 // Disable debug info for global (de-)initializer because they are not part 3844 // of some particular construct. 3845 CGF.disableDebugInfo(); 3846 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 3847 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3848 3849 // Encode offload target triples into the registration function name. It 3850 // will serve as a comdat key for the registration/unregistration code for 3851 // this particular combination of offloading targets. 3852 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 3853 RegFnNameParts[0] = "omp_offloading"; 3854 RegFnNameParts[1] = "descriptor_reg"; 3855 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 3856 [](const llvm::Triple &T) -> const std::string& { 3857 return T.getTriple(); 3858 }); 3859 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 3860 std::string Descriptor = getName(RegFnNameParts); 3861 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 3862 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 3863 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 3864 // Create a variable to drive the registration and unregistration of the 3865 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3866 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 3867 SourceLocation(), nullptr, C.CharTy, 3868 ImplicitParamDecl::Other); 3869 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3870 CGF.FinishFunction(); 3871 } 3872 if (CGM.supportsCOMDAT()) { 3873 // It is sufficient to call registration function only once, so create a 3874 // COMDAT group for registration/unregistration functions and associated 3875 // data. That would reduce startup time and code size. Registration 3876 // function serves as a COMDAT group key. 3877 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3878 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3879 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3880 RegFn->setComdat(ComdatKey); 3881 UnRegFn->setComdat(ComdatKey); 3882 DeviceImages->setComdat(ComdatKey); 3883 Desc->setComdat(ComdatKey); 3884 } 3885 return RegFn; 3886 } 3887 3888 void CGOpenMPRuntime::createOffloadEntry( 3889 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3890 llvm::GlobalValue::LinkageTypes Linkage) { 3891 StringRef Name = Addr->getName(); 3892 llvm::Module &M = CGM.getModule(); 3893 llvm::LLVMContext &C = M.getContext(); 3894 3895 // Create constant string with the name. 3896 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3897 3898 std::string StringName = getName({"omp_offloading", "entry_name"}); 3899 auto *Str = new llvm::GlobalVariable( 3900 M, StrPtrInit->getType(), /*isConstant=*/true, 3901 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3902 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3903 3904 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3905 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3906 llvm::ConstantInt::get(CGM.SizeTy, Size), 3907 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3908 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3909 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3910 llvm::GlobalVariable *Entry = createGlobalStruct( 3911 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3912 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3913 3914 // The entry has to be created in the section the linker expects it to be. 3915 std::string Section = getName({"omp_offloading", "entries"}); 3916 Entry->setSection(Section); 3917 } 3918 3919 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3920 // Emit the offloading entries and metadata so that the device codegen side 3921 // can easily figure out what to emit. The produced metadata looks like 3922 // this: 3923 // 3924 // !omp_offload.info = !{!1, ...} 3925 // 3926 // Right now we only generate metadata for function that contain target 3927 // regions. 3928 3929 // If we do not have entries, we don't need to do anything. 3930 if (OffloadEntriesInfoManager.empty()) 3931 return; 3932 3933 llvm::Module &M = CGM.getModule(); 3934 llvm::LLVMContext &C = M.getContext(); 3935 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 3936 OrderedEntries(OffloadEntriesInfoManager.size()); 3937 llvm::SmallVector<StringRef, 16> ParentFunctions( 3938 OffloadEntriesInfoManager.size()); 3939 3940 // Auxiliary methods to create metadata values and strings. 3941 auto &&GetMDInt = [this](unsigned V) { 3942 return llvm::ConstantAsMetadata::get( 3943 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3944 }; 3945 3946 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3947 3948 // Create the offloading info metadata node. 3949 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3950 3951 // Create function that emits metadata for each target region entry; 3952 auto &&TargetRegionMetadataEmitter = 3953 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 3954 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3955 unsigned Line, 3956 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3957 // Generate metadata for target regions. Each entry of this metadata 3958 // contains: 3959 // - Entry 0 -> Kind of this type of metadata (0). 3960 // - Entry 1 -> Device ID of the file where the entry was identified. 3961 // - Entry 2 -> File ID of the file where the entry was identified. 3962 // - Entry 3 -> Mangled name of the function where the entry was 3963 // identified. 3964 // - Entry 4 -> Line in the file where the entry was identified. 3965 // - Entry 5 -> Order the entry was created. 3966 // The first element of the metadata node is the kind. 3967 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3968 GetMDInt(FileID), GetMDString(ParentName), 3969 GetMDInt(Line), GetMDInt(E.getOrder())}; 3970 3971 // Save this entry in the right position of the ordered entries array. 3972 OrderedEntries[E.getOrder()] = &E; 3973 ParentFunctions[E.getOrder()] = ParentName; 3974 3975 // Add metadata to the named metadata node. 3976 MD->addOperand(llvm::MDNode::get(C, Ops)); 3977 }; 3978 3979 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3980 TargetRegionMetadataEmitter); 3981 3982 // Create function that emits metadata for each device global variable entry; 3983 auto &&DeviceGlobalVarMetadataEmitter = 3984 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3985 MD](StringRef MangledName, 3986 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3987 &E) { 3988 // Generate metadata for global variables. Each entry of this metadata 3989 // contains: 3990 // - Entry 0 -> Kind of this type of metadata (1). 3991 // - Entry 1 -> Mangled name of the variable. 3992 // - Entry 2 -> Declare target kind. 3993 // - Entry 3 -> Order the entry was created. 3994 // The first element of the metadata node is the kind. 3995 llvm::Metadata *Ops[] = { 3996 GetMDInt(E.getKind()), GetMDString(MangledName), 3997 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3998 3999 // Save this entry in the right position of the ordered entries array. 4000 OrderedEntries[E.getOrder()] = &E; 4001 4002 // Add metadata to the named metadata node. 4003 MD->addOperand(llvm::MDNode::get(C, Ops)); 4004 }; 4005 4006 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4007 DeviceGlobalVarMetadataEmitter); 4008 4009 for (const auto *E : OrderedEntries) { 4010 assert(E && "All ordered entries must exist!"); 4011 if (const auto *CE = 4012 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4013 E)) { 4014 if (!CE->getID() || !CE->getAddress()) { 4015 // Do not blame the entry if the parent funtion is not emitted. 4016 StringRef FnName = ParentFunctions[CE->getOrder()]; 4017 if (!CGM.GetGlobalValue(FnName)) 4018 continue; 4019 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4020 DiagnosticsEngine::Error, 4021 "Offloading entry for target region is incorrect: either the " 4022 "address or the ID is invalid."); 4023 CGM.getDiags().Report(DiagID); 4024 continue; 4025 } 4026 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4027 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4028 } else if (const auto *CE = 4029 dyn_cast<OffloadEntriesInfoManagerTy:: 4030 OffloadEntryInfoDeviceGlobalVar>(E)) { 4031 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4032 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4033 CE->getFlags()); 4034 switch (Flags) { 4035 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4036 if (!CE->getAddress()) { 4037 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4038 DiagnosticsEngine::Error, 4039 "Offloading entry for declare target variable is incorrect: the " 4040 "address is invalid."); 4041 CGM.getDiags().Report(DiagID); 4042 continue; 4043 } 4044 // The vaiable has no definition - no need to add the entry. 4045 if (CE->getVarSize().isZero()) 4046 continue; 4047 break; 4048 } 4049 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4050 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4051 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4052 "Declaret target link address is set."); 4053 if (CGM.getLangOpts().OpenMPIsDevice) 4054 continue; 4055 if (!CE->getAddress()) { 4056 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4057 DiagnosticsEngine::Error, 4058 "Offloading entry for declare target variable is incorrect: the " 4059 "address is invalid."); 4060 CGM.getDiags().Report(DiagID); 4061 continue; 4062 } 4063 break; 4064 } 4065 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4066 CE->getVarSize().getQuantity(), Flags, 4067 CE->getLinkage()); 4068 } else { 4069 llvm_unreachable("Unsupported entry kind."); 4070 } 4071 } 4072 } 4073 4074 /// Loads all the offload entries information from the host IR 4075 /// metadata. 4076 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4077 // If we are in target mode, load the metadata from the host IR. This code has 4078 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4079 4080 if (!CGM.getLangOpts().OpenMPIsDevice) 4081 return; 4082 4083 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4084 return; 4085 4086 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4087 if (auto EC = Buf.getError()) { 4088 CGM.getDiags().Report(diag::err_cannot_open_file) 4089 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4090 return; 4091 } 4092 4093 llvm::LLVMContext C; 4094 auto ME = expectedToErrorOrAndEmitErrors( 4095 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4096 4097 if (auto EC = ME.getError()) { 4098 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4099 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4100 CGM.getDiags().Report(DiagID) 4101 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4102 return; 4103 } 4104 4105 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4106 if (!MD) 4107 return; 4108 4109 for (llvm::MDNode *MN : MD->operands()) { 4110 auto &&GetMDInt = [MN](unsigned Idx) { 4111 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4112 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4113 }; 4114 4115 auto &&GetMDString = [MN](unsigned Idx) { 4116 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4117 return V->getString(); 4118 }; 4119 4120 switch (GetMDInt(0)) { 4121 default: 4122 llvm_unreachable("Unexpected metadata!"); 4123 break; 4124 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4125 OffloadingEntryInfoTargetRegion: 4126 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4127 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4128 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4129 /*Order=*/GetMDInt(5)); 4130 break; 4131 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4132 OffloadingEntryInfoDeviceGlobalVar: 4133 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4134 /*MangledName=*/GetMDString(1), 4135 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4136 /*Flags=*/GetMDInt(2)), 4137 /*Order=*/GetMDInt(3)); 4138 break; 4139 } 4140 } 4141 } 4142 4143 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4144 if (!KmpRoutineEntryPtrTy) { 4145 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4146 ASTContext &C = CGM.getContext(); 4147 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4148 FunctionProtoType::ExtProtoInfo EPI; 4149 KmpRoutineEntryPtrQTy = C.getPointerType( 4150 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4151 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4152 } 4153 } 4154 4155 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4156 // Make sure the type of the entry is already created. This is the type we 4157 // have to create: 4158 // struct __tgt_offload_entry{ 4159 // void *addr; // Pointer to the offload entry info. 4160 // // (function or global) 4161 // char *name; // Name of the function or global. 4162 // size_t size; // Size of the entry info (0 if it a function). 4163 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4164 // int32_t reserved; // Reserved, to use by the runtime library. 4165 // }; 4166 if (TgtOffloadEntryQTy.isNull()) { 4167 ASTContext &C = CGM.getContext(); 4168 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4169 RD->startDefinition(); 4170 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4171 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4172 addFieldToRecordDecl(C, RD, C.getSizeType()); 4173 addFieldToRecordDecl( 4174 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4175 addFieldToRecordDecl( 4176 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4177 RD->completeDefinition(); 4178 RD->addAttr(PackedAttr::CreateImplicit(C)); 4179 TgtOffloadEntryQTy = C.getRecordType(RD); 4180 } 4181 return TgtOffloadEntryQTy; 4182 } 4183 4184 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4185 // These are the types we need to build: 4186 // struct __tgt_device_image{ 4187 // void *ImageStart; // Pointer to the target code start. 4188 // void *ImageEnd; // Pointer to the target code end. 4189 // // We also add the host entries to the device image, as it may be useful 4190 // // for the target runtime to have access to that information. 4191 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4192 // // the entries. 4193 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4194 // // entries (non inclusive). 4195 // }; 4196 if (TgtDeviceImageQTy.isNull()) { 4197 ASTContext &C = CGM.getContext(); 4198 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4199 RD->startDefinition(); 4200 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4201 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4202 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4203 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4204 RD->completeDefinition(); 4205 TgtDeviceImageQTy = C.getRecordType(RD); 4206 } 4207 return TgtDeviceImageQTy; 4208 } 4209 4210 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4211 // struct __tgt_bin_desc{ 4212 // int32_t NumDevices; // Number of devices supported. 4213 // __tgt_device_image *DeviceImages; // Arrays of device images 4214 // // (one per device). 4215 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4216 // // entries. 4217 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4218 // // entries (non inclusive). 4219 // }; 4220 if (TgtBinaryDescriptorQTy.isNull()) { 4221 ASTContext &C = CGM.getContext(); 4222 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4223 RD->startDefinition(); 4224 addFieldToRecordDecl( 4225 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4226 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4227 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4228 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4229 RD->completeDefinition(); 4230 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4231 } 4232 return TgtBinaryDescriptorQTy; 4233 } 4234 4235 namespace { 4236 struct PrivateHelpersTy { 4237 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4238 const VarDecl *PrivateElemInit) 4239 : Original(Original), PrivateCopy(PrivateCopy), 4240 PrivateElemInit(PrivateElemInit) {} 4241 const VarDecl *Original; 4242 const VarDecl *PrivateCopy; 4243 const VarDecl *PrivateElemInit; 4244 }; 4245 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4246 } // anonymous namespace 4247 4248 static RecordDecl * 4249 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4250 if (!Privates.empty()) { 4251 ASTContext &C = CGM.getContext(); 4252 // Build struct .kmp_privates_t. { 4253 // /* private vars */ 4254 // }; 4255 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4256 RD->startDefinition(); 4257 for (const auto &Pair : Privates) { 4258 const VarDecl *VD = Pair.second.Original; 4259 QualType Type = VD->getType().getNonReferenceType(); 4260 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4261 if (VD->hasAttrs()) { 4262 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4263 E(VD->getAttrs().end()); 4264 I != E; ++I) 4265 FD->addAttr(*I); 4266 } 4267 } 4268 RD->completeDefinition(); 4269 return RD; 4270 } 4271 return nullptr; 4272 } 4273 4274 static RecordDecl * 4275 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4276 QualType KmpInt32Ty, 4277 QualType KmpRoutineEntryPointerQTy) { 4278 ASTContext &C = CGM.getContext(); 4279 // Build struct kmp_task_t { 4280 // void * shareds; 4281 // kmp_routine_entry_t routine; 4282 // kmp_int32 part_id; 4283 // kmp_cmplrdata_t data1; 4284 // kmp_cmplrdata_t data2; 4285 // For taskloops additional fields: 4286 // kmp_uint64 lb; 4287 // kmp_uint64 ub; 4288 // kmp_int64 st; 4289 // kmp_int32 liter; 4290 // void * reductions; 4291 // }; 4292 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4293 UD->startDefinition(); 4294 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4295 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4296 UD->completeDefinition(); 4297 QualType KmpCmplrdataTy = C.getRecordType(UD); 4298 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4299 RD->startDefinition(); 4300 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4301 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4302 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4303 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4304 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4305 if (isOpenMPTaskLoopDirective(Kind)) { 4306 QualType KmpUInt64Ty = 4307 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4308 QualType KmpInt64Ty = 4309 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4310 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4311 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4312 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4313 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4314 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4315 } 4316 RD->completeDefinition(); 4317 return RD; 4318 } 4319 4320 static RecordDecl * 4321 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4322 ArrayRef<PrivateDataTy> Privates) { 4323 ASTContext &C = CGM.getContext(); 4324 // Build struct kmp_task_t_with_privates { 4325 // kmp_task_t task_data; 4326 // .kmp_privates_t. privates; 4327 // }; 4328 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4329 RD->startDefinition(); 4330 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4331 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4332 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4333 RD->completeDefinition(); 4334 return RD; 4335 } 4336 4337 /// Emit a proxy function which accepts kmp_task_t as the second 4338 /// argument. 4339 /// \code 4340 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4341 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4342 /// For taskloops: 4343 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4344 /// tt->reductions, tt->shareds); 4345 /// return 0; 4346 /// } 4347 /// \endcode 4348 static llvm::Value * 4349 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4350 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4351 QualType KmpTaskTWithPrivatesPtrQTy, 4352 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4353 QualType SharedsPtrTy, llvm::Value *TaskFunction, 4354 llvm::Value *TaskPrivatesMap) { 4355 ASTContext &C = CGM.getContext(); 4356 FunctionArgList Args; 4357 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4358 ImplicitParamDecl::Other); 4359 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4360 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4361 ImplicitParamDecl::Other); 4362 Args.push_back(&GtidArg); 4363 Args.push_back(&TaskTypeArg); 4364 const auto &TaskEntryFnInfo = 4365 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4366 llvm::FunctionType *TaskEntryTy = 4367 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4368 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4369 auto *TaskEntry = llvm::Function::Create( 4370 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4371 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4372 TaskEntry->setDoesNotRecurse(); 4373 CodeGenFunction CGF(CGM); 4374 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4375 Loc, Loc); 4376 4377 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4378 // tt, 4379 // For taskloops: 4380 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4381 // tt->task_data.shareds); 4382 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4383 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4384 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4385 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4386 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4387 const auto *KmpTaskTWithPrivatesQTyRD = 4388 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4389 LValue Base = 4390 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4391 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4392 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4393 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4394 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4395 4396 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4397 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4398 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4399 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4400 CGF.ConvertTypeForMem(SharedsPtrTy)); 4401 4402 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4403 llvm::Value *PrivatesParam; 4404 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4405 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4406 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4407 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4408 } else { 4409 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4410 } 4411 4412 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4413 TaskPrivatesMap, 4414 CGF.Builder 4415 .CreatePointerBitCastOrAddrSpaceCast( 4416 TDBase.getAddress(), CGF.VoidPtrTy) 4417 .getPointer()}; 4418 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4419 std::end(CommonArgs)); 4420 if (isOpenMPTaskLoopDirective(Kind)) { 4421 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4422 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4423 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4424 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4425 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4426 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4427 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4428 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4429 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4430 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4431 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4432 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4433 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4434 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4435 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4436 CallArgs.push_back(LBParam); 4437 CallArgs.push_back(UBParam); 4438 CallArgs.push_back(StParam); 4439 CallArgs.push_back(LIParam); 4440 CallArgs.push_back(RParam); 4441 } 4442 CallArgs.push_back(SharedsParam); 4443 4444 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4445 CallArgs); 4446 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4447 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4448 CGF.FinishFunction(); 4449 return TaskEntry; 4450 } 4451 4452 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4453 SourceLocation Loc, 4454 QualType KmpInt32Ty, 4455 QualType KmpTaskTWithPrivatesPtrQTy, 4456 QualType KmpTaskTWithPrivatesQTy) { 4457 ASTContext &C = CGM.getContext(); 4458 FunctionArgList Args; 4459 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4460 ImplicitParamDecl::Other); 4461 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4462 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4463 ImplicitParamDecl::Other); 4464 Args.push_back(&GtidArg); 4465 Args.push_back(&TaskTypeArg); 4466 const auto &DestructorFnInfo = 4467 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4468 llvm::FunctionType *DestructorFnTy = 4469 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4470 std::string Name = 4471 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4472 auto *DestructorFn = 4473 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4474 Name, &CGM.getModule()); 4475 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4476 DestructorFnInfo); 4477 DestructorFn->setDoesNotRecurse(); 4478 CodeGenFunction CGF(CGM); 4479 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4480 Args, Loc, Loc); 4481 4482 LValue Base = CGF.EmitLoadOfPointerLValue( 4483 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4484 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4485 const auto *KmpTaskTWithPrivatesQTyRD = 4486 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4487 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4488 Base = CGF.EmitLValueForField(Base, *FI); 4489 for (const auto *Field : 4490 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4491 if (QualType::DestructionKind DtorKind = 4492 Field->getType().isDestructedType()) { 4493 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4494 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4495 } 4496 } 4497 CGF.FinishFunction(); 4498 return DestructorFn; 4499 } 4500 4501 /// Emit a privates mapping function for correct handling of private and 4502 /// firstprivate variables. 4503 /// \code 4504 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4505 /// **noalias priv1,..., <tyn> **noalias privn) { 4506 /// *priv1 = &.privates.priv1; 4507 /// ...; 4508 /// *privn = &.privates.privn; 4509 /// } 4510 /// \endcode 4511 static llvm::Value * 4512 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4513 ArrayRef<const Expr *> PrivateVars, 4514 ArrayRef<const Expr *> FirstprivateVars, 4515 ArrayRef<const Expr *> LastprivateVars, 4516 QualType PrivatesQTy, 4517 ArrayRef<PrivateDataTy> Privates) { 4518 ASTContext &C = CGM.getContext(); 4519 FunctionArgList Args; 4520 ImplicitParamDecl TaskPrivatesArg( 4521 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4522 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4523 ImplicitParamDecl::Other); 4524 Args.push_back(&TaskPrivatesArg); 4525 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4526 unsigned Counter = 1; 4527 for (const Expr *E : PrivateVars) { 4528 Args.push_back(ImplicitParamDecl::Create( 4529 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4530 C.getPointerType(C.getPointerType(E->getType())) 4531 .withConst() 4532 .withRestrict(), 4533 ImplicitParamDecl::Other)); 4534 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4535 PrivateVarsPos[VD] = Counter; 4536 ++Counter; 4537 } 4538 for (const Expr *E : FirstprivateVars) { 4539 Args.push_back(ImplicitParamDecl::Create( 4540 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4541 C.getPointerType(C.getPointerType(E->getType())) 4542 .withConst() 4543 .withRestrict(), 4544 ImplicitParamDecl::Other)); 4545 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4546 PrivateVarsPos[VD] = Counter; 4547 ++Counter; 4548 } 4549 for (const Expr *E : LastprivateVars) { 4550 Args.push_back(ImplicitParamDecl::Create( 4551 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4552 C.getPointerType(C.getPointerType(E->getType())) 4553 .withConst() 4554 .withRestrict(), 4555 ImplicitParamDecl::Other)); 4556 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4557 PrivateVarsPos[VD] = Counter; 4558 ++Counter; 4559 } 4560 const auto &TaskPrivatesMapFnInfo = 4561 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4562 llvm::FunctionType *TaskPrivatesMapTy = 4563 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4564 std::string Name = 4565 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4566 auto *TaskPrivatesMap = llvm::Function::Create( 4567 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4568 &CGM.getModule()); 4569 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4570 TaskPrivatesMapFnInfo); 4571 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4572 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4573 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4574 CodeGenFunction CGF(CGM); 4575 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4576 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4577 4578 // *privi = &.privates.privi; 4579 LValue Base = CGF.EmitLoadOfPointerLValue( 4580 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4581 TaskPrivatesArg.getType()->castAs<PointerType>()); 4582 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4583 Counter = 0; 4584 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4585 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4586 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4587 LValue RefLVal = 4588 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4589 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4590 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4591 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4592 ++Counter; 4593 } 4594 CGF.FinishFunction(); 4595 return TaskPrivatesMap; 4596 } 4597 4598 static bool stable_sort_comparator(const PrivateDataTy P1, 4599 const PrivateDataTy P2) { 4600 return P1.first > P2.first; 4601 } 4602 4603 /// Emit initialization for private variables in task-based directives. 4604 static void emitPrivatesInit(CodeGenFunction &CGF, 4605 const OMPExecutableDirective &D, 4606 Address KmpTaskSharedsPtr, LValue TDBase, 4607 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4608 QualType SharedsTy, QualType SharedsPtrTy, 4609 const OMPTaskDataTy &Data, 4610 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4611 ASTContext &C = CGF.getContext(); 4612 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4613 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4614 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4615 ? OMPD_taskloop 4616 : OMPD_task; 4617 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4618 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4619 LValue SrcBase; 4620 bool IsTargetTask = 4621 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4622 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4623 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4624 // PointersArray and SizesArray. The original variables for these arrays are 4625 // not captured and we get their addresses explicitly. 4626 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4627 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4628 SrcBase = CGF.MakeAddrLValue( 4629 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4630 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4631 SharedsTy); 4632 } 4633 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4634 for (const PrivateDataTy &Pair : Privates) { 4635 const VarDecl *VD = Pair.second.PrivateCopy; 4636 const Expr *Init = VD->getAnyInitializer(); 4637 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4638 !CGF.isTrivialInitializer(Init)))) { 4639 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4640 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4641 const VarDecl *OriginalVD = Pair.second.Original; 4642 // Check if the variable is the target-based BasePointersArray, 4643 // PointersArray or SizesArray. 4644 LValue SharedRefLValue; 4645 QualType Type = OriginalVD->getType(); 4646 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4647 if (IsTargetTask && !SharedField) { 4648 assert(isa<ImplicitParamDecl>(OriginalVD) && 4649 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4650 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4651 ->getNumParams() == 0 && 4652 isa<TranslationUnitDecl>( 4653 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4654 ->getDeclContext()) && 4655 "Expected artificial target data variable."); 4656 SharedRefLValue = 4657 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4658 } else { 4659 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4660 SharedRefLValue = CGF.MakeAddrLValue( 4661 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4662 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4663 SharedRefLValue.getTBAAInfo()); 4664 } 4665 if (Type->isArrayType()) { 4666 // Initialize firstprivate array. 4667 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4668 // Perform simple memcpy. 4669 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4670 } else { 4671 // Initialize firstprivate array using element-by-element 4672 // initialization. 4673 CGF.EmitOMPAggregateAssign( 4674 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4675 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4676 Address SrcElement) { 4677 // Clean up any temporaries needed by the initialization. 4678 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4679 InitScope.addPrivate( 4680 Elem, [SrcElement]() -> Address { return SrcElement; }); 4681 (void)InitScope.Privatize(); 4682 // Emit initialization for single element. 4683 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4684 CGF, &CapturesInfo); 4685 CGF.EmitAnyExprToMem(Init, DestElement, 4686 Init->getType().getQualifiers(), 4687 /*IsInitializer=*/false); 4688 }); 4689 } 4690 } else { 4691 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4692 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4693 return SharedRefLValue.getAddress(); 4694 }); 4695 (void)InitScope.Privatize(); 4696 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4697 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4698 /*capturedByInit=*/false); 4699 } 4700 } else { 4701 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4702 } 4703 } 4704 ++FI; 4705 } 4706 } 4707 4708 /// Check if duplication function is required for taskloops. 4709 static bool checkInitIsRequired(CodeGenFunction &CGF, 4710 ArrayRef<PrivateDataTy> Privates) { 4711 bool InitRequired = false; 4712 for (const PrivateDataTy &Pair : Privates) { 4713 const VarDecl *VD = Pair.second.PrivateCopy; 4714 const Expr *Init = VD->getAnyInitializer(); 4715 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4716 !CGF.isTrivialInitializer(Init)); 4717 if (InitRequired) 4718 break; 4719 } 4720 return InitRequired; 4721 } 4722 4723 4724 /// Emit task_dup function (for initialization of 4725 /// private/firstprivate/lastprivate vars and last_iter flag) 4726 /// \code 4727 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4728 /// lastpriv) { 4729 /// // setup lastprivate flag 4730 /// task_dst->last = lastpriv; 4731 /// // could be constructor calls here... 4732 /// } 4733 /// \endcode 4734 static llvm::Value * 4735 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4736 const OMPExecutableDirective &D, 4737 QualType KmpTaskTWithPrivatesPtrQTy, 4738 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4739 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4740 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4741 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4742 ASTContext &C = CGM.getContext(); 4743 FunctionArgList Args; 4744 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4745 KmpTaskTWithPrivatesPtrQTy, 4746 ImplicitParamDecl::Other); 4747 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4748 KmpTaskTWithPrivatesPtrQTy, 4749 ImplicitParamDecl::Other); 4750 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4751 ImplicitParamDecl::Other); 4752 Args.push_back(&DstArg); 4753 Args.push_back(&SrcArg); 4754 Args.push_back(&LastprivArg); 4755 const auto &TaskDupFnInfo = 4756 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4757 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4758 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4759 auto *TaskDup = llvm::Function::Create( 4760 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4761 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4762 TaskDup->setDoesNotRecurse(); 4763 CodeGenFunction CGF(CGM); 4764 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4765 Loc); 4766 4767 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4768 CGF.GetAddrOfLocalVar(&DstArg), 4769 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4770 // task_dst->liter = lastpriv; 4771 if (WithLastIter) { 4772 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4773 LValue Base = CGF.EmitLValueForField( 4774 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4775 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4776 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4777 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4778 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4779 } 4780 4781 // Emit initial values for private copies (if any). 4782 assert(!Privates.empty()); 4783 Address KmpTaskSharedsPtr = Address::invalid(); 4784 if (!Data.FirstprivateVars.empty()) { 4785 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4786 CGF.GetAddrOfLocalVar(&SrcArg), 4787 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4788 LValue Base = CGF.EmitLValueForField( 4789 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4790 KmpTaskSharedsPtr = Address( 4791 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4792 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4793 KmpTaskTShareds)), 4794 Loc), 4795 CGF.getNaturalTypeAlignment(SharedsTy)); 4796 } 4797 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4798 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4799 CGF.FinishFunction(); 4800 return TaskDup; 4801 } 4802 4803 /// Checks if destructor function is required to be generated. 4804 /// \return true if cleanups are required, false otherwise. 4805 static bool 4806 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4807 bool NeedsCleanup = false; 4808 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4809 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4810 for (const FieldDecl *FD : PrivateRD->fields()) { 4811 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4812 if (NeedsCleanup) 4813 break; 4814 } 4815 return NeedsCleanup; 4816 } 4817 4818 CGOpenMPRuntime::TaskResultTy 4819 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4820 const OMPExecutableDirective &D, 4821 llvm::Value *TaskFunction, QualType SharedsTy, 4822 Address Shareds, const OMPTaskDataTy &Data) { 4823 ASTContext &C = CGM.getContext(); 4824 llvm::SmallVector<PrivateDataTy, 4> Privates; 4825 // Aggregate privates and sort them by the alignment. 4826 auto I = Data.PrivateCopies.begin(); 4827 for (const Expr *E : Data.PrivateVars) { 4828 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4829 Privates.emplace_back( 4830 C.getDeclAlign(VD), 4831 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4832 /*PrivateElemInit=*/nullptr)); 4833 ++I; 4834 } 4835 I = Data.FirstprivateCopies.begin(); 4836 auto IElemInitRef = Data.FirstprivateInits.begin(); 4837 for (const Expr *E : Data.FirstprivateVars) { 4838 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4839 Privates.emplace_back( 4840 C.getDeclAlign(VD), 4841 PrivateHelpersTy( 4842 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4843 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4844 ++I; 4845 ++IElemInitRef; 4846 } 4847 I = Data.LastprivateCopies.begin(); 4848 for (const Expr *E : Data.LastprivateVars) { 4849 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4850 Privates.emplace_back( 4851 C.getDeclAlign(VD), 4852 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4853 /*PrivateElemInit=*/nullptr)); 4854 ++I; 4855 } 4856 std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); 4857 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4858 // Build type kmp_routine_entry_t (if not built yet). 4859 emitKmpRoutineEntryT(KmpInt32Ty); 4860 // Build type kmp_task_t (if not built yet). 4861 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4862 if (SavedKmpTaskloopTQTy.isNull()) { 4863 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4864 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4865 } 4866 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4867 } else { 4868 assert((D.getDirectiveKind() == OMPD_task || 4869 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4870 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4871 "Expected taskloop, task or target directive"); 4872 if (SavedKmpTaskTQTy.isNull()) { 4873 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4874 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4875 } 4876 KmpTaskTQTy = SavedKmpTaskTQTy; 4877 } 4878 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4879 // Build particular struct kmp_task_t for the given task. 4880 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4881 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4882 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4883 QualType KmpTaskTWithPrivatesPtrQTy = 4884 C.getPointerType(KmpTaskTWithPrivatesQTy); 4885 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4886 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4887 KmpTaskTWithPrivatesTy->getPointerTo(); 4888 llvm::Value *KmpTaskTWithPrivatesTySize = 4889 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4890 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4891 4892 // Emit initial values for private copies (if any). 4893 llvm::Value *TaskPrivatesMap = nullptr; 4894 llvm::Type *TaskPrivatesMapTy = 4895 std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); 4896 if (!Privates.empty()) { 4897 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4898 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4899 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4900 FI->getType(), Privates); 4901 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4902 TaskPrivatesMap, TaskPrivatesMapTy); 4903 } else { 4904 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4905 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4906 } 4907 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4908 // kmp_task_t *tt); 4909 llvm::Value *TaskEntry = emitProxyTaskFunction( 4910 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4911 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4912 TaskPrivatesMap); 4913 4914 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4915 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4916 // kmp_routine_entry_t *task_entry); 4917 // Task flags. Format is taken from 4918 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 4919 // description of kmp_tasking_flags struct. 4920 enum { 4921 TiedFlag = 0x1, 4922 FinalFlag = 0x2, 4923 DestructorsFlag = 0x8, 4924 PriorityFlag = 0x20 4925 }; 4926 unsigned Flags = Data.Tied ? TiedFlag : 0; 4927 bool NeedsCleanup = false; 4928 if (!Privates.empty()) { 4929 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4930 if (NeedsCleanup) 4931 Flags = Flags | DestructorsFlag; 4932 } 4933 if (Data.Priority.getInt()) 4934 Flags = Flags | PriorityFlag; 4935 llvm::Value *TaskFlags = 4936 Data.Final.getPointer() 4937 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4938 CGF.Builder.getInt32(FinalFlag), 4939 CGF.Builder.getInt32(/*C=*/0)) 4940 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4941 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4942 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4943 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 4944 getThreadID(CGF, Loc), TaskFlags, 4945 KmpTaskTWithPrivatesTySize, SharedsSize, 4946 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4947 TaskEntry, KmpRoutineEntryPtrTy)}; 4948 llvm::Value *NewTask = CGF.EmitRuntimeCall( 4949 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 4950 llvm::Value *NewTaskNewTaskTTy = 4951 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4952 NewTask, KmpTaskTWithPrivatesPtrTy); 4953 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4954 KmpTaskTWithPrivatesQTy); 4955 LValue TDBase = 4956 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4957 // Fill the data in the resulting kmp_task_t record. 4958 // Copy shareds if there are any. 4959 Address KmpTaskSharedsPtr = Address::invalid(); 4960 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4961 KmpTaskSharedsPtr = 4962 Address(CGF.EmitLoadOfScalar( 4963 CGF.EmitLValueForField( 4964 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4965 KmpTaskTShareds)), 4966 Loc), 4967 CGF.getNaturalTypeAlignment(SharedsTy)); 4968 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4969 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4970 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4971 } 4972 // Emit initial values for private copies (if any). 4973 TaskResultTy Result; 4974 if (!Privates.empty()) { 4975 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4976 SharedsTy, SharedsPtrTy, Data, Privates, 4977 /*ForDup=*/false); 4978 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4979 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4980 Result.TaskDupFn = emitTaskDupFunction( 4981 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4982 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4983 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4984 } 4985 } 4986 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4987 enum { Priority = 0, Destructors = 1 }; 4988 // Provide pointer to function with destructors for privates. 4989 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4990 const RecordDecl *KmpCmplrdataUD = 4991 (*FI)->getType()->getAsUnionType()->getDecl(); 4992 if (NeedsCleanup) { 4993 llvm::Value *DestructorFn = emitDestructorsFunction( 4994 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4995 KmpTaskTWithPrivatesQTy); 4996 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4997 LValue DestructorsLV = CGF.EmitLValueForField( 4998 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4999 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5000 DestructorFn, KmpRoutineEntryPtrTy), 5001 DestructorsLV); 5002 } 5003 // Set priority. 5004 if (Data.Priority.getInt()) { 5005 LValue Data2LV = CGF.EmitLValueForField( 5006 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5007 LValue PriorityLV = CGF.EmitLValueForField( 5008 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5009 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5010 } 5011 Result.NewTask = NewTask; 5012 Result.TaskEntry = TaskEntry; 5013 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5014 Result.TDBase = TDBase; 5015 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5016 return Result; 5017 } 5018 5019 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5020 const OMPExecutableDirective &D, 5021 llvm::Value *TaskFunction, 5022 QualType SharedsTy, Address Shareds, 5023 const Expr *IfCond, 5024 const OMPTaskDataTy &Data) { 5025 if (!CGF.HaveInsertPoint()) 5026 return; 5027 5028 TaskResultTy Result = 5029 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5030 llvm::Value *NewTask = Result.NewTask; 5031 llvm::Value *TaskEntry = Result.TaskEntry; 5032 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5033 LValue TDBase = Result.TDBase; 5034 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5035 ASTContext &C = CGM.getContext(); 5036 // Process list of dependences. 5037 Address DependenciesArray = Address::invalid(); 5038 unsigned NumDependencies = Data.Dependences.size(); 5039 if (NumDependencies) { 5040 // Dependence kind for RTL. 5041 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 5042 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5043 RecordDecl *KmpDependInfoRD; 5044 QualType FlagsTy = 5045 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5046 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5047 if (KmpDependInfoTy.isNull()) { 5048 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5049 KmpDependInfoRD->startDefinition(); 5050 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5051 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5052 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5053 KmpDependInfoRD->completeDefinition(); 5054 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5055 } else { 5056 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5057 } 5058 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 5059 // Define type kmp_depend_info[<Dependences.size()>]; 5060 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5061 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5062 ArrayType::Normal, /*IndexTypeQuals=*/0); 5063 // kmp_depend_info[<Dependences.size()>] deps; 5064 DependenciesArray = 5065 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5066 for (unsigned I = 0; I < NumDependencies; ++I) { 5067 const Expr *E = Data.Dependences[I].second; 5068 LValue Addr = CGF.EmitLValue(E); 5069 llvm::Value *Size; 5070 QualType Ty = E->getType(); 5071 if (const auto *ASE = 5072 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5073 LValue UpAddrLVal = 5074 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 5075 llvm::Value *UpAddr = 5076 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5077 llvm::Value *LowIntPtr = 5078 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5079 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5080 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5081 } else { 5082 Size = CGF.getTypeSize(Ty); 5083 } 5084 LValue Base = CGF.MakeAddrLValue( 5085 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize), 5086 KmpDependInfoTy); 5087 // deps[i].base_addr = &<Dependences[i].second>; 5088 LValue BaseAddrLVal = CGF.EmitLValueForField( 5089 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5090 CGF.EmitStoreOfScalar( 5091 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5092 BaseAddrLVal); 5093 // deps[i].len = sizeof(<Dependences[i].second>); 5094 LValue LenLVal = CGF.EmitLValueForField( 5095 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5096 CGF.EmitStoreOfScalar(Size, LenLVal); 5097 // deps[i].flags = <Dependences[i].first>; 5098 RTLDependenceKindTy DepKind; 5099 switch (Data.Dependences[I].first) { 5100 case OMPC_DEPEND_in: 5101 DepKind = DepIn; 5102 break; 5103 // Out and InOut dependencies must use the same code. 5104 case OMPC_DEPEND_out: 5105 case OMPC_DEPEND_inout: 5106 DepKind = DepInOut; 5107 break; 5108 case OMPC_DEPEND_source: 5109 case OMPC_DEPEND_sink: 5110 case OMPC_DEPEND_unknown: 5111 llvm_unreachable("Unknown task dependence type"); 5112 } 5113 LValue FlagsLVal = CGF.EmitLValueForField( 5114 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5115 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5116 FlagsLVal); 5117 } 5118 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5119 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 5120 CGF.VoidPtrTy); 5121 } 5122 5123 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5124 // libcall. 5125 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5126 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5127 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5128 // list is not empty 5129 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5130 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5131 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5132 llvm::Value *DepTaskArgs[7]; 5133 if (NumDependencies) { 5134 DepTaskArgs[0] = UpLoc; 5135 DepTaskArgs[1] = ThreadID; 5136 DepTaskArgs[2] = NewTask; 5137 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5138 DepTaskArgs[4] = DependenciesArray.getPointer(); 5139 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5140 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5141 } 5142 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5143 &TaskArgs, 5144 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5145 if (!Data.Tied) { 5146 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5147 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5148 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5149 } 5150 if (NumDependencies) { 5151 CGF.EmitRuntimeCall( 5152 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5153 } else { 5154 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5155 TaskArgs); 5156 } 5157 // Check if parent region is untied and build return for untied task; 5158 if (auto *Region = 5159 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5160 Region->emitUntiedSwitch(CGF); 5161 }; 5162 5163 llvm::Value *DepWaitTaskArgs[6]; 5164 if (NumDependencies) { 5165 DepWaitTaskArgs[0] = UpLoc; 5166 DepWaitTaskArgs[1] = ThreadID; 5167 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5168 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5169 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5170 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5171 } 5172 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5173 NumDependencies, &DepWaitTaskArgs, 5174 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5175 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5176 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5177 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5178 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5179 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5180 // is specified. 5181 if (NumDependencies) 5182 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5183 DepWaitTaskArgs); 5184 // Call proxy_task_entry(gtid, new_task); 5185 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5186 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5187 Action.Enter(CGF); 5188 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5189 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5190 OutlinedFnArgs); 5191 }; 5192 5193 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5194 // kmp_task_t *new_task); 5195 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5196 // kmp_task_t *new_task); 5197 RegionCodeGenTy RCG(CodeGen); 5198 CommonActionTy Action( 5199 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5200 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5201 RCG.setAction(Action); 5202 RCG(CGF); 5203 }; 5204 5205 if (IfCond) { 5206 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5207 } else { 5208 RegionCodeGenTy ThenRCG(ThenCodeGen); 5209 ThenRCG(CGF); 5210 } 5211 } 5212 5213 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5214 const OMPLoopDirective &D, 5215 llvm::Value *TaskFunction, 5216 QualType SharedsTy, Address Shareds, 5217 const Expr *IfCond, 5218 const OMPTaskDataTy &Data) { 5219 if (!CGF.HaveInsertPoint()) 5220 return; 5221 TaskResultTy Result = 5222 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5223 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5224 // libcall. 5225 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5226 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5227 // sched, kmp_uint64 grainsize, void *task_dup); 5228 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5229 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5230 llvm::Value *IfVal; 5231 if (IfCond) { 5232 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5233 /*isSigned=*/true); 5234 } else { 5235 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5236 } 5237 5238 LValue LBLVal = CGF.EmitLValueForField( 5239 Result.TDBase, 5240 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5241 const auto *LBVar = 5242 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5243 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5244 /*IsInitializer=*/true); 5245 LValue UBLVal = CGF.EmitLValueForField( 5246 Result.TDBase, 5247 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5248 const auto *UBVar = 5249 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5250 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5251 /*IsInitializer=*/true); 5252 LValue StLVal = CGF.EmitLValueForField( 5253 Result.TDBase, 5254 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5255 const auto *StVar = 5256 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5257 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5258 /*IsInitializer=*/true); 5259 // Store reductions address. 5260 LValue RedLVal = CGF.EmitLValueForField( 5261 Result.TDBase, 5262 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5263 if (Data.Reductions) { 5264 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5265 } else { 5266 CGF.EmitNullInitialization(RedLVal.getAddress(), 5267 CGF.getContext().VoidPtrTy); 5268 } 5269 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5270 llvm::Value *TaskArgs[] = { 5271 UpLoc, 5272 ThreadID, 5273 Result.NewTask, 5274 IfVal, 5275 LBLVal.getPointer(), 5276 UBLVal.getPointer(), 5277 CGF.EmitLoadOfScalar(StLVal, Loc), 5278 llvm::ConstantInt::getSigned( 5279 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5280 llvm::ConstantInt::getSigned( 5281 CGF.IntTy, Data.Schedule.getPointer() 5282 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5283 : NoSchedule), 5284 Data.Schedule.getPointer() 5285 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5286 /*isSigned=*/false) 5287 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5288 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5289 Result.TaskDupFn, CGF.VoidPtrTy) 5290 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5291 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5292 } 5293 5294 /// Emit reduction operation for each element of array (required for 5295 /// array sections) LHS op = RHS. 5296 /// \param Type Type of array. 5297 /// \param LHSVar Variable on the left side of the reduction operation 5298 /// (references element of array in original variable). 5299 /// \param RHSVar Variable on the right side of the reduction operation 5300 /// (references element of array in original variable). 5301 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5302 /// RHSVar. 5303 static void EmitOMPAggregateReduction( 5304 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5305 const VarDecl *RHSVar, 5306 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5307 const Expr *, const Expr *)> &RedOpGen, 5308 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5309 const Expr *UpExpr = nullptr) { 5310 // Perform element-by-element initialization. 5311 QualType ElementTy; 5312 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5313 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5314 5315 // Drill down to the base element type on both arrays. 5316 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5317 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5318 5319 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5320 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5321 // Cast from pointer to array type to pointer to single element. 5322 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5323 // The basic structure here is a while-do loop. 5324 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5325 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5326 llvm::Value *IsEmpty = 5327 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5328 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5329 5330 // Enter the loop body, making that address the current address. 5331 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5332 CGF.EmitBlock(BodyBB); 5333 5334 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5335 5336 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5337 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5338 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5339 Address RHSElementCurrent = 5340 Address(RHSElementPHI, 5341 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5342 5343 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5344 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5345 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5346 Address LHSElementCurrent = 5347 Address(LHSElementPHI, 5348 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5349 5350 // Emit copy. 5351 CodeGenFunction::OMPPrivateScope Scope(CGF); 5352 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5353 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5354 Scope.Privatize(); 5355 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5356 Scope.ForceCleanup(); 5357 5358 // Shift the address forward by one element. 5359 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5360 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5361 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5362 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5363 // Check whether we've reached the end. 5364 llvm::Value *Done = 5365 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5366 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5367 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5368 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5369 5370 // Done. 5371 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5372 } 5373 5374 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5375 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5376 /// UDR combiner function. 5377 static void emitReductionCombiner(CodeGenFunction &CGF, 5378 const Expr *ReductionOp) { 5379 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5380 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5381 if (const auto *DRE = 5382 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5383 if (const auto *DRD = 5384 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5385 std::pair<llvm::Function *, llvm::Function *> Reduction = 5386 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5387 RValue Func = RValue::get(Reduction.first); 5388 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5389 CGF.EmitIgnoredExpr(ReductionOp); 5390 return; 5391 } 5392 CGF.EmitIgnoredExpr(ReductionOp); 5393 } 5394 5395 llvm::Value *CGOpenMPRuntime::emitReductionFunction( 5396 CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, 5397 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5398 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5399 ASTContext &C = CGM.getContext(); 5400 5401 // void reduction_func(void *LHSArg, void *RHSArg); 5402 FunctionArgList Args; 5403 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5404 ImplicitParamDecl::Other); 5405 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5406 ImplicitParamDecl::Other); 5407 Args.push_back(&LHSArg); 5408 Args.push_back(&RHSArg); 5409 const auto &CGFI = 5410 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5411 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5412 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5413 llvm::GlobalValue::InternalLinkage, Name, 5414 &CGM.getModule()); 5415 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5416 Fn->setDoesNotRecurse(); 5417 CodeGenFunction CGF(CGM); 5418 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5419 5420 // Dst = (void*[n])(LHSArg); 5421 // Src = (void*[n])(RHSArg); 5422 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5423 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5424 ArgsType), CGF.getPointerAlign()); 5425 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5426 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5427 ArgsType), CGF.getPointerAlign()); 5428 5429 // ... 5430 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5431 // ... 5432 CodeGenFunction::OMPPrivateScope Scope(CGF); 5433 auto IPriv = Privates.begin(); 5434 unsigned Idx = 0; 5435 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5436 const auto *RHSVar = 5437 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5438 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5439 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5440 }); 5441 const auto *LHSVar = 5442 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5443 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5444 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5445 }); 5446 QualType PrivTy = (*IPriv)->getType(); 5447 if (PrivTy->isVariablyModifiedType()) { 5448 // Get array size and emit VLA type. 5449 ++Idx; 5450 Address Elem = 5451 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 5452 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5453 const VariableArrayType *VLA = 5454 CGF.getContext().getAsVariableArrayType(PrivTy); 5455 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5456 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5457 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5458 CGF.EmitVariablyModifiedType(PrivTy); 5459 } 5460 } 5461 Scope.Privatize(); 5462 IPriv = Privates.begin(); 5463 auto ILHS = LHSExprs.begin(); 5464 auto IRHS = RHSExprs.begin(); 5465 for (const Expr *E : ReductionOps) { 5466 if ((*IPriv)->getType()->isArrayType()) { 5467 // Emit reduction for array section. 5468 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5469 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5470 EmitOMPAggregateReduction( 5471 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5472 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5473 emitReductionCombiner(CGF, E); 5474 }); 5475 } else { 5476 // Emit reduction for array subscript or single variable. 5477 emitReductionCombiner(CGF, E); 5478 } 5479 ++IPriv; 5480 ++ILHS; 5481 ++IRHS; 5482 } 5483 Scope.ForceCleanup(); 5484 CGF.FinishFunction(); 5485 return Fn; 5486 } 5487 5488 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5489 const Expr *ReductionOp, 5490 const Expr *PrivateRef, 5491 const DeclRefExpr *LHS, 5492 const DeclRefExpr *RHS) { 5493 if (PrivateRef->getType()->isArrayType()) { 5494 // Emit reduction for array section. 5495 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5496 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5497 EmitOMPAggregateReduction( 5498 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5499 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5500 emitReductionCombiner(CGF, ReductionOp); 5501 }); 5502 } else { 5503 // Emit reduction for array subscript or single variable. 5504 emitReductionCombiner(CGF, ReductionOp); 5505 } 5506 } 5507 5508 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5509 ArrayRef<const Expr *> Privates, 5510 ArrayRef<const Expr *> LHSExprs, 5511 ArrayRef<const Expr *> RHSExprs, 5512 ArrayRef<const Expr *> ReductionOps, 5513 ReductionOptionsTy Options) { 5514 if (!CGF.HaveInsertPoint()) 5515 return; 5516 5517 bool WithNowait = Options.WithNowait; 5518 bool SimpleReduction = Options.SimpleReduction; 5519 5520 // Next code should be emitted for reduction: 5521 // 5522 // static kmp_critical_name lock = { 0 }; 5523 // 5524 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5525 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5526 // ... 5527 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5528 // *(Type<n>-1*)rhs[<n>-1]); 5529 // } 5530 // 5531 // ... 5532 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5533 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5534 // RedList, reduce_func, &<lock>)) { 5535 // case 1: 5536 // ... 5537 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5538 // ... 5539 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5540 // break; 5541 // case 2: 5542 // ... 5543 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5544 // ... 5545 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5546 // break; 5547 // default:; 5548 // } 5549 // 5550 // if SimpleReduction is true, only the next code is generated: 5551 // ... 5552 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5553 // ... 5554 5555 ASTContext &C = CGM.getContext(); 5556 5557 if (SimpleReduction) { 5558 CodeGenFunction::RunCleanupsScope Scope(CGF); 5559 auto IPriv = Privates.begin(); 5560 auto ILHS = LHSExprs.begin(); 5561 auto IRHS = RHSExprs.begin(); 5562 for (const Expr *E : ReductionOps) { 5563 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5564 cast<DeclRefExpr>(*IRHS)); 5565 ++IPriv; 5566 ++ILHS; 5567 ++IRHS; 5568 } 5569 return; 5570 } 5571 5572 // 1. Build a list of reduction variables. 5573 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5574 auto Size = RHSExprs.size(); 5575 for (const Expr *E : Privates) { 5576 if (E->getType()->isVariablyModifiedType()) 5577 // Reserve place for array size. 5578 ++Size; 5579 } 5580 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5581 QualType ReductionArrayTy = 5582 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5583 /*IndexTypeQuals=*/0); 5584 Address ReductionList = 5585 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5586 auto IPriv = Privates.begin(); 5587 unsigned Idx = 0; 5588 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5589 Address Elem = 5590 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 5591 CGF.Builder.CreateStore( 5592 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5593 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5594 Elem); 5595 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5596 // Store array size. 5597 ++Idx; 5598 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 5599 CGF.getPointerSize()); 5600 llvm::Value *Size = CGF.Builder.CreateIntCast( 5601 CGF.getVLASize( 5602 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5603 .NumElts, 5604 CGF.SizeTy, /*isSigned=*/false); 5605 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5606 Elem); 5607 } 5608 } 5609 5610 // 2. Emit reduce_func(). 5611 llvm::Value *ReductionFn = emitReductionFunction( 5612 CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), 5613 Privates, LHSExprs, RHSExprs, ReductionOps); 5614 5615 // 3. Create static kmp_critical_name lock = { 0 }; 5616 std::string Name = getName({"reduction"}); 5617 llvm::Value *Lock = getCriticalRegionLock(Name); 5618 5619 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5620 // RedList, reduce_func, &<lock>); 5621 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5622 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5623 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5624 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5625 ReductionList.getPointer(), CGF.VoidPtrTy); 5626 llvm::Value *Args[] = { 5627 IdentTLoc, // ident_t *<loc> 5628 ThreadId, // i32 <gtid> 5629 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5630 ReductionArrayTySize, // size_type sizeof(RedList) 5631 RL, // void *RedList 5632 ReductionFn, // void (*) (void *, void *) <reduce_func> 5633 Lock // kmp_critical_name *&<lock> 5634 }; 5635 llvm::Value *Res = CGF.EmitRuntimeCall( 5636 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5637 : OMPRTL__kmpc_reduce), 5638 Args); 5639 5640 // 5. Build switch(res) 5641 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5642 llvm::SwitchInst *SwInst = 5643 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5644 5645 // 6. Build case 1: 5646 // ... 5647 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5648 // ... 5649 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5650 // break; 5651 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5652 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5653 CGF.EmitBlock(Case1BB); 5654 5655 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5656 llvm::Value *EndArgs[] = { 5657 IdentTLoc, // ident_t *<loc> 5658 ThreadId, // i32 <gtid> 5659 Lock // kmp_critical_name *&<lock> 5660 }; 5661 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5662 CodeGenFunction &CGF, PrePostActionTy &Action) { 5663 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5664 auto IPriv = Privates.begin(); 5665 auto ILHS = LHSExprs.begin(); 5666 auto IRHS = RHSExprs.begin(); 5667 for (const Expr *E : ReductionOps) { 5668 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5669 cast<DeclRefExpr>(*IRHS)); 5670 ++IPriv; 5671 ++ILHS; 5672 ++IRHS; 5673 } 5674 }; 5675 RegionCodeGenTy RCG(CodeGen); 5676 CommonActionTy Action( 5677 nullptr, llvm::None, 5678 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5679 : OMPRTL__kmpc_end_reduce), 5680 EndArgs); 5681 RCG.setAction(Action); 5682 RCG(CGF); 5683 5684 CGF.EmitBranch(DefaultBB); 5685 5686 // 7. Build case 2: 5687 // ... 5688 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5689 // ... 5690 // break; 5691 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5692 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5693 CGF.EmitBlock(Case2BB); 5694 5695 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5696 CodeGenFunction &CGF, PrePostActionTy &Action) { 5697 auto ILHS = LHSExprs.begin(); 5698 auto IRHS = RHSExprs.begin(); 5699 auto IPriv = Privates.begin(); 5700 for (const Expr *E : ReductionOps) { 5701 const Expr *XExpr = nullptr; 5702 const Expr *EExpr = nullptr; 5703 const Expr *UpExpr = nullptr; 5704 BinaryOperatorKind BO = BO_Comma; 5705 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5706 if (BO->getOpcode() == BO_Assign) { 5707 XExpr = BO->getLHS(); 5708 UpExpr = BO->getRHS(); 5709 } 5710 } 5711 // Try to emit update expression as a simple atomic. 5712 const Expr *RHSExpr = UpExpr; 5713 if (RHSExpr) { 5714 // Analyze RHS part of the whole expression. 5715 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5716 RHSExpr->IgnoreParenImpCasts())) { 5717 // If this is a conditional operator, analyze its condition for 5718 // min/max reduction operator. 5719 RHSExpr = ACO->getCond(); 5720 } 5721 if (const auto *BORHS = 5722 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5723 EExpr = BORHS->getRHS(); 5724 BO = BORHS->getOpcode(); 5725 } 5726 } 5727 if (XExpr) { 5728 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5729 auto &&AtomicRedGen = [BO, VD, 5730 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5731 const Expr *EExpr, const Expr *UpExpr) { 5732 LValue X = CGF.EmitLValue(XExpr); 5733 RValue E; 5734 if (EExpr) 5735 E = CGF.EmitAnyExpr(EExpr); 5736 CGF.EmitOMPAtomicSimpleUpdateExpr( 5737 X, E, BO, /*IsXLHSInRHSPart=*/true, 5738 llvm::AtomicOrdering::Monotonic, Loc, 5739 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5740 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5741 PrivateScope.addPrivate( 5742 VD, [&CGF, VD, XRValue, Loc]() { 5743 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5744 CGF.emitOMPSimpleStore( 5745 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5746 VD->getType().getNonReferenceType(), Loc); 5747 return LHSTemp; 5748 }); 5749 (void)PrivateScope.Privatize(); 5750 return CGF.EmitAnyExpr(UpExpr); 5751 }); 5752 }; 5753 if ((*IPriv)->getType()->isArrayType()) { 5754 // Emit atomic reduction for array section. 5755 const auto *RHSVar = 5756 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5757 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5758 AtomicRedGen, XExpr, EExpr, UpExpr); 5759 } else { 5760 // Emit atomic reduction for array subscript or single variable. 5761 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5762 } 5763 } else { 5764 // Emit as a critical region. 5765 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5766 const Expr *, const Expr *) { 5767 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5768 std::string Name = RT.getName({"atomic_reduction"}); 5769 RT.emitCriticalRegion( 5770 CGF, Name, 5771 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5772 Action.Enter(CGF); 5773 emitReductionCombiner(CGF, E); 5774 }, 5775 Loc); 5776 }; 5777 if ((*IPriv)->getType()->isArrayType()) { 5778 const auto *LHSVar = 5779 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5780 const auto *RHSVar = 5781 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5782 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5783 CritRedGen); 5784 } else { 5785 CritRedGen(CGF, nullptr, nullptr, nullptr); 5786 } 5787 } 5788 ++ILHS; 5789 ++IRHS; 5790 ++IPriv; 5791 } 5792 }; 5793 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5794 if (!WithNowait) { 5795 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5796 llvm::Value *EndArgs[] = { 5797 IdentTLoc, // ident_t *<loc> 5798 ThreadId, // i32 <gtid> 5799 Lock // kmp_critical_name *&<lock> 5800 }; 5801 CommonActionTy Action(nullptr, llvm::None, 5802 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5803 EndArgs); 5804 AtomicRCG.setAction(Action); 5805 AtomicRCG(CGF); 5806 } else { 5807 AtomicRCG(CGF); 5808 } 5809 5810 CGF.EmitBranch(DefaultBB); 5811 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5812 } 5813 5814 /// Generates unique name for artificial threadprivate variables. 5815 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5816 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5817 const Expr *Ref) { 5818 SmallString<256> Buffer; 5819 llvm::raw_svector_ostream Out(Buffer); 5820 const clang::DeclRefExpr *DE; 5821 const VarDecl *D = ::getBaseDecl(Ref, DE); 5822 if (!D) 5823 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5824 D = D->getCanonicalDecl(); 5825 std::string Name = CGM.getOpenMPRuntime().getName( 5826 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5827 Out << Prefix << Name << "_" 5828 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5829 return Out.str(); 5830 } 5831 5832 /// Emits reduction initializer function: 5833 /// \code 5834 /// void @.red_init(void* %arg) { 5835 /// %0 = bitcast void* %arg to <type>* 5836 /// store <type> <init>, <type>* %0 5837 /// ret void 5838 /// } 5839 /// \endcode 5840 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5841 SourceLocation Loc, 5842 ReductionCodeGen &RCG, unsigned N) { 5843 ASTContext &C = CGM.getContext(); 5844 FunctionArgList Args; 5845 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5846 ImplicitParamDecl::Other); 5847 Args.emplace_back(&Param); 5848 const auto &FnInfo = 5849 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5850 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5851 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5852 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5853 Name, &CGM.getModule()); 5854 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5855 Fn->setDoesNotRecurse(); 5856 CodeGenFunction CGF(CGM); 5857 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5858 Address PrivateAddr = CGF.EmitLoadOfPointer( 5859 CGF.GetAddrOfLocalVar(&Param), 5860 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5861 llvm::Value *Size = nullptr; 5862 // If the size of the reduction item is non-constant, load it from global 5863 // threadprivate variable. 5864 if (RCG.getSizes(N).second) { 5865 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5866 CGF, CGM.getContext().getSizeType(), 5867 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5868 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5869 CGM.getContext().getSizeType(), Loc); 5870 } 5871 RCG.emitAggregateType(CGF, N, Size); 5872 LValue SharedLVal; 5873 // If initializer uses initializer from declare reduction construct, emit a 5874 // pointer to the address of the original reduction item (reuired by reduction 5875 // initializer) 5876 if (RCG.usesReductionInitializer(N)) { 5877 Address SharedAddr = 5878 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5879 CGF, CGM.getContext().VoidPtrTy, 5880 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5881 SharedAddr = CGF.EmitLoadOfPointer( 5882 SharedAddr, 5883 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5884 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5885 } else { 5886 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5887 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5888 CGM.getContext().VoidPtrTy); 5889 } 5890 // Emit the initializer: 5891 // %0 = bitcast void* %arg to <type>* 5892 // store <type> <init>, <type>* %0 5893 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5894 [](CodeGenFunction &) { return false; }); 5895 CGF.FinishFunction(); 5896 return Fn; 5897 } 5898 5899 /// Emits reduction combiner function: 5900 /// \code 5901 /// void @.red_comb(void* %arg0, void* %arg1) { 5902 /// %lhs = bitcast void* %arg0 to <type>* 5903 /// %rhs = bitcast void* %arg1 to <type>* 5904 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5905 /// store <type> %2, <type>* %lhs 5906 /// ret void 5907 /// } 5908 /// \endcode 5909 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5910 SourceLocation Loc, 5911 ReductionCodeGen &RCG, unsigned N, 5912 const Expr *ReductionOp, 5913 const Expr *LHS, const Expr *RHS, 5914 const Expr *PrivateRef) { 5915 ASTContext &C = CGM.getContext(); 5916 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5917 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5918 FunctionArgList Args; 5919 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5920 C.VoidPtrTy, ImplicitParamDecl::Other); 5921 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5922 ImplicitParamDecl::Other); 5923 Args.emplace_back(&ParamInOut); 5924 Args.emplace_back(&ParamIn); 5925 const auto &FnInfo = 5926 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5927 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5928 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5929 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5930 Name, &CGM.getModule()); 5931 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5932 Fn->setDoesNotRecurse(); 5933 CodeGenFunction CGF(CGM); 5934 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5935 llvm::Value *Size = nullptr; 5936 // If the size of the reduction item is non-constant, load it from global 5937 // threadprivate variable. 5938 if (RCG.getSizes(N).second) { 5939 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5940 CGF, CGM.getContext().getSizeType(), 5941 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5942 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5943 CGM.getContext().getSizeType(), Loc); 5944 } 5945 RCG.emitAggregateType(CGF, N, Size); 5946 // Remap lhs and rhs variables to the addresses of the function arguments. 5947 // %lhs = bitcast void* %arg0 to <type>* 5948 // %rhs = bitcast void* %arg1 to <type>* 5949 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5950 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5951 // Pull out the pointer to the variable. 5952 Address PtrAddr = CGF.EmitLoadOfPointer( 5953 CGF.GetAddrOfLocalVar(&ParamInOut), 5954 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5955 return CGF.Builder.CreateElementBitCast( 5956 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5957 }); 5958 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5959 // Pull out the pointer to the variable. 5960 Address PtrAddr = CGF.EmitLoadOfPointer( 5961 CGF.GetAddrOfLocalVar(&ParamIn), 5962 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5963 return CGF.Builder.CreateElementBitCast( 5964 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5965 }); 5966 PrivateScope.Privatize(); 5967 // Emit the combiner body: 5968 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5969 // store <type> %2, <type>* %lhs 5970 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5971 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5972 cast<DeclRefExpr>(RHS)); 5973 CGF.FinishFunction(); 5974 return Fn; 5975 } 5976 5977 /// Emits reduction finalizer function: 5978 /// \code 5979 /// void @.red_fini(void* %arg) { 5980 /// %0 = bitcast void* %arg to <type>* 5981 /// <destroy>(<type>* %0) 5982 /// ret void 5983 /// } 5984 /// \endcode 5985 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5986 SourceLocation Loc, 5987 ReductionCodeGen &RCG, unsigned N) { 5988 if (!RCG.needCleanups(N)) 5989 return nullptr; 5990 ASTContext &C = CGM.getContext(); 5991 FunctionArgList Args; 5992 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5993 ImplicitParamDecl::Other); 5994 Args.emplace_back(&Param); 5995 const auto &FnInfo = 5996 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5997 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5998 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5999 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6000 Name, &CGM.getModule()); 6001 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6002 Fn->setDoesNotRecurse(); 6003 CodeGenFunction CGF(CGM); 6004 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6005 Address PrivateAddr = CGF.EmitLoadOfPointer( 6006 CGF.GetAddrOfLocalVar(&Param), 6007 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6008 llvm::Value *Size = nullptr; 6009 // If the size of the reduction item is non-constant, load it from global 6010 // threadprivate variable. 6011 if (RCG.getSizes(N).second) { 6012 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6013 CGF, CGM.getContext().getSizeType(), 6014 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6015 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6016 CGM.getContext().getSizeType(), Loc); 6017 } 6018 RCG.emitAggregateType(CGF, N, Size); 6019 // Emit the finalizer body: 6020 // <destroy>(<type>* %0) 6021 RCG.emitCleanups(CGF, N, PrivateAddr); 6022 CGF.FinishFunction(); 6023 return Fn; 6024 } 6025 6026 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6027 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6028 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6029 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6030 return nullptr; 6031 6032 // Build typedef struct: 6033 // kmp_task_red_input { 6034 // void *reduce_shar; // shared reduction item 6035 // size_t reduce_size; // size of data item 6036 // void *reduce_init; // data initialization routine 6037 // void *reduce_fini; // data finalization routine 6038 // void *reduce_comb; // data combiner routine 6039 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6040 // } kmp_task_red_input_t; 6041 ASTContext &C = CGM.getContext(); 6042 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6043 RD->startDefinition(); 6044 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6045 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6046 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6047 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6048 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6049 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6050 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6051 RD->completeDefinition(); 6052 QualType RDType = C.getRecordType(RD); 6053 unsigned Size = Data.ReductionVars.size(); 6054 llvm::APInt ArraySize(/*numBits=*/64, Size); 6055 QualType ArrayRDType = C.getConstantArrayType( 6056 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6057 // kmp_task_red_input_t .rd_input.[Size]; 6058 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6059 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6060 Data.ReductionOps); 6061 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6062 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6063 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6064 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6065 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6066 TaskRedInput.getPointer(), Idxs, 6067 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6068 ".rd_input.gep."); 6069 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6070 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6071 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6072 RCG.emitSharedLValue(CGF, Cnt); 6073 llvm::Value *CastedShared = 6074 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6075 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6076 RCG.emitAggregateType(CGF, Cnt); 6077 llvm::Value *SizeValInChars; 6078 llvm::Value *SizeVal; 6079 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6080 // We use delayed creation/initialization for VLAs, array sections and 6081 // custom reduction initializations. It is required because runtime does not 6082 // provide the way to pass the sizes of VLAs/array sections to 6083 // initializer/combiner/finalizer functions and does not pass the pointer to 6084 // original reduction item to the initializer. Instead threadprivate global 6085 // variables are used to store these values and use them in the functions. 6086 bool DelayedCreation = !!SizeVal; 6087 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6088 /*isSigned=*/false); 6089 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6090 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6091 // ElemLVal.reduce_init = init; 6092 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6093 llvm::Value *InitAddr = 6094 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6095 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6096 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6097 // ElemLVal.reduce_fini = fini; 6098 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6099 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6100 llvm::Value *FiniAddr = Fini 6101 ? CGF.EmitCastToVoidPtr(Fini) 6102 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6103 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6104 // ElemLVal.reduce_comb = comb; 6105 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6106 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6107 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6108 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6109 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6110 // ElemLVal.flags = 0; 6111 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6112 if (DelayedCreation) { 6113 CGF.EmitStoreOfScalar( 6114 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 6115 FlagsLVal); 6116 } else 6117 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6118 } 6119 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6120 // *data); 6121 llvm::Value *Args[] = { 6122 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6123 /*isSigned=*/true), 6124 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6125 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6126 CGM.VoidPtrTy)}; 6127 return CGF.EmitRuntimeCall( 6128 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6129 } 6130 6131 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6132 SourceLocation Loc, 6133 ReductionCodeGen &RCG, 6134 unsigned N) { 6135 auto Sizes = RCG.getSizes(N); 6136 // Emit threadprivate global variable if the type is non-constant 6137 // (Sizes.second = nullptr). 6138 if (Sizes.second) { 6139 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6140 /*isSigned=*/false); 6141 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6142 CGF, CGM.getContext().getSizeType(), 6143 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6144 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6145 } 6146 // Store address of the original reduction item if custom initializer is used. 6147 if (RCG.usesReductionInitializer(N)) { 6148 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6149 CGF, CGM.getContext().VoidPtrTy, 6150 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6151 CGF.Builder.CreateStore( 6152 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6153 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6154 SharedAddr, /*IsVolatile=*/false); 6155 } 6156 } 6157 6158 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6159 SourceLocation Loc, 6160 llvm::Value *ReductionsPtr, 6161 LValue SharedLVal) { 6162 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6163 // *d); 6164 llvm::Value *Args[] = { 6165 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6166 /*isSigned=*/true), 6167 ReductionsPtr, 6168 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6169 CGM.VoidPtrTy)}; 6170 return Address( 6171 CGF.EmitRuntimeCall( 6172 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6173 SharedLVal.getAlignment()); 6174 } 6175 6176 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6177 SourceLocation Loc) { 6178 if (!CGF.HaveInsertPoint()) 6179 return; 6180 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6181 // global_tid); 6182 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6183 // Ignore return result until untied tasks are supported. 6184 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6185 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6186 Region->emitUntiedSwitch(CGF); 6187 } 6188 6189 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6190 OpenMPDirectiveKind InnerKind, 6191 const RegionCodeGenTy &CodeGen, 6192 bool HasCancel) { 6193 if (!CGF.HaveInsertPoint()) 6194 return; 6195 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6196 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6197 } 6198 6199 namespace { 6200 enum RTCancelKind { 6201 CancelNoreq = 0, 6202 CancelParallel = 1, 6203 CancelLoop = 2, 6204 CancelSections = 3, 6205 CancelTaskgroup = 4 6206 }; 6207 } // anonymous namespace 6208 6209 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6210 RTCancelKind CancelKind = CancelNoreq; 6211 if (CancelRegion == OMPD_parallel) 6212 CancelKind = CancelParallel; 6213 else if (CancelRegion == OMPD_for) 6214 CancelKind = CancelLoop; 6215 else if (CancelRegion == OMPD_sections) 6216 CancelKind = CancelSections; 6217 else { 6218 assert(CancelRegion == OMPD_taskgroup); 6219 CancelKind = CancelTaskgroup; 6220 } 6221 return CancelKind; 6222 } 6223 6224 void CGOpenMPRuntime::emitCancellationPointCall( 6225 CodeGenFunction &CGF, SourceLocation Loc, 6226 OpenMPDirectiveKind CancelRegion) { 6227 if (!CGF.HaveInsertPoint()) 6228 return; 6229 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6230 // global_tid, kmp_int32 cncl_kind); 6231 if (auto *OMPRegionInfo = 6232 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6233 // For 'cancellation point taskgroup', the task region info may not have a 6234 // cancel. This may instead happen in another adjacent task. 6235 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6236 llvm::Value *Args[] = { 6237 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6238 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6239 // Ignore return result until untied tasks are supported. 6240 llvm::Value *Result = CGF.EmitRuntimeCall( 6241 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6242 // if (__kmpc_cancellationpoint()) { 6243 // exit from construct; 6244 // } 6245 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6246 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6247 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6248 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6249 CGF.EmitBlock(ExitBB); 6250 // exit from construct; 6251 CodeGenFunction::JumpDest CancelDest = 6252 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6253 CGF.EmitBranchThroughCleanup(CancelDest); 6254 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6255 } 6256 } 6257 } 6258 6259 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6260 const Expr *IfCond, 6261 OpenMPDirectiveKind CancelRegion) { 6262 if (!CGF.HaveInsertPoint()) 6263 return; 6264 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6265 // kmp_int32 cncl_kind); 6266 if (auto *OMPRegionInfo = 6267 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6268 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6269 PrePostActionTy &) { 6270 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6271 llvm::Value *Args[] = { 6272 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6273 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6274 // Ignore return result until untied tasks are supported. 6275 llvm::Value *Result = CGF.EmitRuntimeCall( 6276 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6277 // if (__kmpc_cancel()) { 6278 // exit from construct; 6279 // } 6280 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6281 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6282 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6283 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6284 CGF.EmitBlock(ExitBB); 6285 // exit from construct; 6286 CodeGenFunction::JumpDest CancelDest = 6287 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6288 CGF.EmitBranchThroughCleanup(CancelDest); 6289 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6290 }; 6291 if (IfCond) { 6292 emitOMPIfClause(CGF, IfCond, ThenGen, 6293 [](CodeGenFunction &, PrePostActionTy &) {}); 6294 } else { 6295 RegionCodeGenTy ThenRCG(ThenGen); 6296 ThenRCG(CGF); 6297 } 6298 } 6299 } 6300 6301 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6302 const OMPExecutableDirective &D, StringRef ParentName, 6303 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6304 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6305 assert(!ParentName.empty() && "Invalid target region parent name!"); 6306 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6307 IsOffloadEntry, CodeGen); 6308 } 6309 6310 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6311 const OMPExecutableDirective &D, StringRef ParentName, 6312 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6313 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6314 // Create a unique name for the entry function using the source location 6315 // information of the current target region. The name will be something like: 6316 // 6317 // __omp_offloading_DD_FFFF_PP_lBB 6318 // 6319 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6320 // mangled name of the function that encloses the target region and BB is the 6321 // line number of the target region. 6322 6323 unsigned DeviceID; 6324 unsigned FileID; 6325 unsigned Line; 6326 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6327 Line); 6328 SmallString<64> EntryFnName; 6329 { 6330 llvm::raw_svector_ostream OS(EntryFnName); 6331 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6332 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6333 } 6334 6335 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6336 6337 CodeGenFunction CGF(CGM, true); 6338 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6339 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6340 6341 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6342 6343 // If this target outline function is not an offload entry, we don't need to 6344 // register it. 6345 if (!IsOffloadEntry) 6346 return; 6347 6348 // The target region ID is used by the runtime library to identify the current 6349 // target region, so it only has to be unique and not necessarily point to 6350 // anything. It could be the pointer to the outlined function that implements 6351 // the target region, but we aren't using that so that the compiler doesn't 6352 // need to keep that, and could therefore inline the host function if proven 6353 // worthwhile during optimization. In the other hand, if emitting code for the 6354 // device, the ID has to be the function address so that it can retrieved from 6355 // the offloading entry and launched by the runtime library. We also mark the 6356 // outlined function to have external linkage in case we are emitting code for 6357 // the device, because these functions will be entry points to the device. 6358 6359 if (CGM.getLangOpts().OpenMPIsDevice) { 6360 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6361 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6362 OutlinedFn->setDSOLocal(false); 6363 } else { 6364 std::string Name = getName({EntryFnName, "region_id"}); 6365 OutlinedFnID = new llvm::GlobalVariable( 6366 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6367 llvm::GlobalValue::WeakAnyLinkage, 6368 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6369 } 6370 6371 // Register the information for the entry associated with this target region. 6372 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6373 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6374 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6375 } 6376 6377 /// discard all CompoundStmts intervening between two constructs 6378 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 6379 while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 6380 Body = CS->body_front(); 6381 6382 return Body; 6383 } 6384 6385 /// Emit the number of teams for a target directive. Inspect the num_teams 6386 /// clause associated with a teams construct combined or closely nested 6387 /// with the target directive. 6388 /// 6389 /// Emit a team of size one for directives such as 'target parallel' that 6390 /// have no associated teams construct. 6391 /// 6392 /// Otherwise, return nullptr. 6393 static llvm::Value * 6394 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6395 CodeGenFunction &CGF, 6396 const OMPExecutableDirective &D) { 6397 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6398 "teams directive expected to be " 6399 "emitted only for the host!"); 6400 6401 CGBuilderTy &Bld = CGF.Builder; 6402 6403 // If the target directive is combined with a teams directive: 6404 // Return the value in the num_teams clause, if any. 6405 // Otherwise, return 0 to denote the runtime default. 6406 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 6407 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 6408 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6409 llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 6410 /*IgnoreResultAssign*/ true); 6411 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6412 /*IsSigned=*/true); 6413 } 6414 6415 // The default value is 0. 6416 return Bld.getInt32(0); 6417 } 6418 6419 // If the target directive is combined with a parallel directive but not a 6420 // teams directive, start one team. 6421 if (isOpenMPParallelDirective(D.getDirectiveKind())) 6422 return Bld.getInt32(1); 6423 6424 // If the current target region has a teams region enclosed, we need to get 6425 // the number of teams to pass to the runtime function call. This is done 6426 // by generating the expression in a inlined region. This is required because 6427 // the expression is captured in the enclosing target environment when the 6428 // teams directive is not combined with target. 6429 6430 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6431 6432 if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6433 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6434 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6435 if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 6436 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6437 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6438 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 6439 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6440 /*IsSigned=*/true); 6441 } 6442 6443 // If we have an enclosed teams directive but no num_teams clause we use 6444 // the default value 0. 6445 return Bld.getInt32(0); 6446 } 6447 } 6448 6449 // No teams associated with the directive. 6450 return nullptr; 6451 } 6452 6453 /// Emit the number of threads for a target directive. Inspect the 6454 /// thread_limit clause associated with a teams construct combined or closely 6455 /// nested with the target directive. 6456 /// 6457 /// Emit the num_threads clause for directives such as 'target parallel' that 6458 /// have no associated teams construct. 6459 /// 6460 /// Otherwise, return nullptr. 6461 static llvm::Value * 6462 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6463 CodeGenFunction &CGF, 6464 const OMPExecutableDirective &D) { 6465 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6466 "teams directive expected to be " 6467 "emitted only for the host!"); 6468 6469 CGBuilderTy &Bld = CGF.Builder; 6470 6471 // 6472 // If the target directive is combined with a teams directive: 6473 // Return the value in the thread_limit clause, if any. 6474 // 6475 // If the target directive is combined with a parallel directive: 6476 // Return the value in the num_threads clause, if any. 6477 // 6478 // If both clauses are set, select the minimum of the two. 6479 // 6480 // If neither teams or parallel combined directives set the number of threads 6481 // in a team, return 0 to denote the runtime default. 6482 // 6483 // If this is not a teams directive return nullptr. 6484 6485 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 6486 isOpenMPParallelDirective(D.getDirectiveKind())) { 6487 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 6488 llvm::Value *NumThreadsVal = nullptr; 6489 llvm::Value *ThreadLimitVal = nullptr; 6490 6491 if (const auto *ThreadLimitClause = 6492 D.getSingleClause<OMPThreadLimitClause>()) { 6493 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6494 llvm::Value *ThreadLimit = 6495 CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 6496 /*IgnoreResultAssign*/ true); 6497 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6498 /*IsSigned=*/true); 6499 } 6500 6501 if (const auto *NumThreadsClause = 6502 D.getSingleClause<OMPNumThreadsClause>()) { 6503 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6504 llvm::Value *NumThreads = 6505 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 6506 /*IgnoreResultAssign*/ true); 6507 NumThreadsVal = 6508 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 6509 } 6510 6511 // Select the lesser of thread_limit and num_threads. 6512 if (NumThreadsVal) 6513 ThreadLimitVal = ThreadLimitVal 6514 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 6515 ThreadLimitVal), 6516 NumThreadsVal, ThreadLimitVal) 6517 : NumThreadsVal; 6518 6519 // Set default value passed to the runtime if either teams or a target 6520 // parallel type directive is found but no clause is specified. 6521 if (!ThreadLimitVal) 6522 ThreadLimitVal = DefaultThreadLimitVal; 6523 6524 return ThreadLimitVal; 6525 } 6526 6527 // If the current target region has a teams region enclosed, we need to get 6528 // the thread limit to pass to the runtime function call. This is done 6529 // by generating the expression in a inlined region. This is required because 6530 // the expression is captured in the enclosing target environment when the 6531 // teams directive is not combined with target. 6532 6533 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6534 6535 if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6536 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6537 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6538 if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 6539 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6540 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6541 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 6542 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6543 /*IsSigned=*/true); 6544 } 6545 6546 // If we have an enclosed teams directive but no thread_limit clause we 6547 // use the default value 0. 6548 return CGF.Builder.getInt32(0); 6549 } 6550 } 6551 6552 // No teams associated with the directive. 6553 return nullptr; 6554 } 6555 6556 namespace { 6557 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6558 6559 // Utility to handle information from clauses associated with a given 6560 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6561 // It provides a convenient interface to obtain the information and generate 6562 // code for that information. 6563 class MappableExprsHandler { 6564 public: 6565 /// Values for bit flags used to specify the mapping type for 6566 /// offloading. 6567 enum OpenMPOffloadMappingFlags : uint64_t { 6568 /// No flags 6569 OMP_MAP_NONE = 0x0, 6570 /// Allocate memory on the device and move data from host to device. 6571 OMP_MAP_TO = 0x01, 6572 /// Allocate memory on the device and move data from device to host. 6573 OMP_MAP_FROM = 0x02, 6574 /// Always perform the requested mapping action on the element, even 6575 /// if it was already mapped before. 6576 OMP_MAP_ALWAYS = 0x04, 6577 /// Delete the element from the device environment, ignoring the 6578 /// current reference count associated with the element. 6579 OMP_MAP_DELETE = 0x08, 6580 /// The element being mapped is a pointer-pointee pair; both the 6581 /// pointer and the pointee should be mapped. 6582 OMP_MAP_PTR_AND_OBJ = 0x10, 6583 /// This flags signals that the base address of an entry should be 6584 /// passed to the target kernel as an argument. 6585 OMP_MAP_TARGET_PARAM = 0x20, 6586 /// Signal that the runtime library has to return the device pointer 6587 /// in the current position for the data being mapped. Used when we have the 6588 /// use_device_ptr clause. 6589 OMP_MAP_RETURN_PARAM = 0x40, 6590 /// This flag signals that the reference being passed is a pointer to 6591 /// private data. 6592 OMP_MAP_PRIVATE = 0x80, 6593 /// Pass the element to the device by value. 6594 OMP_MAP_LITERAL = 0x100, 6595 /// Implicit map 6596 OMP_MAP_IMPLICIT = 0x200, 6597 /// The 16 MSBs of the flags indicate whether the entry is member of some 6598 /// struct/class. 6599 OMP_MAP_MEMBER_OF = 0xffff000000000000, 6600 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 6601 }; 6602 6603 /// Class that associates information with a base pointer to be passed to the 6604 /// runtime library. 6605 class BasePointerInfo { 6606 /// The base pointer. 6607 llvm::Value *Ptr = nullptr; 6608 /// The base declaration that refers to this device pointer, or null if 6609 /// there is none. 6610 const ValueDecl *DevPtrDecl = nullptr; 6611 6612 public: 6613 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 6614 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 6615 llvm::Value *operator*() const { return Ptr; } 6616 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 6617 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 6618 }; 6619 6620 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 6621 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 6622 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 6623 6624 /// Map between a struct and the its lowest & highest elements which have been 6625 /// mapped. 6626 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6627 /// HE(FieldIndex, Pointer)} 6628 struct StructRangeInfoTy { 6629 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6630 0, Address::invalid()}; 6631 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 6632 0, Address::invalid()}; 6633 Address Base = Address::invalid(); 6634 }; 6635 6636 private: 6637 /// Kind that defines how a device pointer has to be returned. 6638 struct MapInfo { 6639 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6640 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6641 OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; 6642 bool ReturnDevicePointer = false; 6643 bool IsImplicit = false; 6644 6645 MapInfo() = default; 6646 MapInfo( 6647 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6648 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6649 bool ReturnDevicePointer, bool IsImplicit) 6650 : Components(Components), MapType(MapType), 6651 MapTypeModifier(MapTypeModifier), 6652 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 6653 }; 6654 6655 /// If use_device_ptr is used on a pointer which is a struct member and there 6656 /// is no map information about it, then emission of that entry is deferred 6657 /// until the whole struct has been processed. 6658 struct DeferredDevicePtrEntryTy { 6659 const Expr *IE = nullptr; 6660 const ValueDecl *VD = nullptr; 6661 6662 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 6663 : IE(IE), VD(VD) {} 6664 }; 6665 6666 /// Directive from where the map clauses were extracted. 6667 const OMPExecutableDirective &CurDir; 6668 6669 /// Function the directive is being generated for. 6670 CodeGenFunction &CGF; 6671 6672 /// Set of all first private variables in the current directive. 6673 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 6674 6675 /// Map between device pointer declarations and their expression components. 6676 /// The key value for declarations in 'this' is null. 6677 llvm::DenseMap< 6678 const ValueDecl *, 6679 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6680 DevPointersMap; 6681 6682 llvm::Value *getExprTypeSize(const Expr *E) const { 6683 QualType ExprTy = E->getType().getCanonicalType(); 6684 6685 // Reference types are ignored for mapping purposes. 6686 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6687 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6688 6689 // Given that an array section is considered a built-in type, we need to 6690 // do the calculation based on the length of the section instead of relying 6691 // on CGF.getTypeSize(E->getType()). 6692 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 6693 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 6694 OAE->getBase()->IgnoreParenImpCasts()) 6695 .getCanonicalType(); 6696 6697 // If there is no length associated with the expression, that means we 6698 // are using the whole length of the base. 6699 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 6700 return CGF.getTypeSize(BaseTy); 6701 6702 llvm::Value *ElemSize; 6703 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 6704 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6705 } else { 6706 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6707 assert(ATy && "Expecting array type if not a pointer type."); 6708 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6709 } 6710 6711 // If we don't have a length at this point, that is because we have an 6712 // array section with a single element. 6713 if (!OAE->getLength()) 6714 return ElemSize; 6715 6716 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 6717 LengthVal = 6718 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 6719 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6720 } 6721 return CGF.getTypeSize(ExprTy); 6722 } 6723 6724 /// Return the corresponding bits for a given map clause modifier. Add 6725 /// a flag marking the map as a pointer if requested. Add a flag marking the 6726 /// map as the first one of a series of maps that relate to the same map 6727 /// expression. 6728 OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType, 6729 OpenMPMapClauseKind MapTypeModifier, 6730 bool IsImplicit, bool AddPtrFlag, 6731 bool AddIsTargetParamFlag) const { 6732 OpenMPOffloadMappingFlags Bits = 6733 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 6734 switch (MapType) { 6735 case OMPC_MAP_alloc: 6736 case OMPC_MAP_release: 6737 // alloc and release is the default behavior in the runtime library, i.e. 6738 // if we don't pass any bits alloc/release that is what the runtime is 6739 // going to do. Therefore, we don't need to signal anything for these two 6740 // type modifiers. 6741 break; 6742 case OMPC_MAP_to: 6743 Bits |= OMP_MAP_TO; 6744 break; 6745 case OMPC_MAP_from: 6746 Bits |= OMP_MAP_FROM; 6747 break; 6748 case OMPC_MAP_tofrom: 6749 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 6750 break; 6751 case OMPC_MAP_delete: 6752 Bits |= OMP_MAP_DELETE; 6753 break; 6754 case OMPC_MAP_always: 6755 case OMPC_MAP_unknown: 6756 llvm_unreachable("Unexpected map type!"); 6757 } 6758 if (AddPtrFlag) 6759 Bits |= OMP_MAP_PTR_AND_OBJ; 6760 if (AddIsTargetParamFlag) 6761 Bits |= OMP_MAP_TARGET_PARAM; 6762 if (MapTypeModifier == OMPC_MAP_always) 6763 Bits |= OMP_MAP_ALWAYS; 6764 return Bits; 6765 } 6766 6767 /// Return true if the provided expression is a final array section. A 6768 /// final array section, is one whose length can't be proved to be one. 6769 bool isFinalArraySectionExpression(const Expr *E) const { 6770 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 6771 6772 // It is not an array section and therefore not a unity-size one. 6773 if (!OASE) 6774 return false; 6775 6776 // An array section with no colon always refer to a single element. 6777 if (OASE->getColonLoc().isInvalid()) 6778 return false; 6779 6780 const Expr *Length = OASE->getLength(); 6781 6782 // If we don't have a length we have to check if the array has size 1 6783 // for this dimension. Also, we should always expect a length if the 6784 // base type is pointer. 6785 if (!Length) { 6786 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 6787 OASE->getBase()->IgnoreParenImpCasts()) 6788 .getCanonicalType(); 6789 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6790 return ATy->getSize().getSExtValue() != 1; 6791 // If we don't have a constant dimension length, we have to consider 6792 // the current section as having any size, so it is not necessarily 6793 // unitary. If it happen to be unity size, that's user fault. 6794 return true; 6795 } 6796 6797 // Check if the length evaluates to 1. 6798 llvm::APSInt ConstLength; 6799 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 6800 return true; // Can have more that size 1. 6801 6802 return ConstLength.getSExtValue() != 1; 6803 } 6804 6805 /// Generate the base pointers, section pointers, sizes and map type 6806 /// bits for the provided map type, map modifier, and expression components. 6807 /// \a IsFirstComponent should be set to true if the provided set of 6808 /// components is the first associated with a capture. 6809 void generateInfoForComponentList( 6810 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6811 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6812 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 6813 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 6814 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 6815 bool IsImplicit, 6816 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 6817 OverlappedElements = llvm::None) const { 6818 // The following summarizes what has to be generated for each map and the 6819 // types below. The generated information is expressed in this order: 6820 // base pointer, section pointer, size, flags 6821 // (to add to the ones that come from the map type and modifier). 6822 // 6823 // double d; 6824 // int i[100]; 6825 // float *p; 6826 // 6827 // struct S1 { 6828 // int i; 6829 // float f[50]; 6830 // } 6831 // struct S2 { 6832 // int i; 6833 // float f[50]; 6834 // S1 s; 6835 // double *p; 6836 // struct S2 *ps; 6837 // } 6838 // S2 s; 6839 // S2 *ps; 6840 // 6841 // map(d) 6842 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 6843 // 6844 // map(i) 6845 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 6846 // 6847 // map(i[1:23]) 6848 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 6849 // 6850 // map(p) 6851 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 6852 // 6853 // map(p[1:24]) 6854 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 6855 // 6856 // map(s) 6857 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 6858 // 6859 // map(s.i) 6860 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 6861 // 6862 // map(s.s.f) 6863 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6864 // 6865 // map(s.p) 6866 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 6867 // 6868 // map(to: s.p[:22]) 6869 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 6870 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 6871 // &(s.p), &(s.p[0]), 22*sizeof(double), 6872 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6873 // (*) alloc space for struct members, only this is a target parameter 6874 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6875 // optimizes this entry out, same in the examples below) 6876 // (***) map the pointee (map: to) 6877 // 6878 // map(s.ps) 6879 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6880 // 6881 // map(from: s.ps->s.i) 6882 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6883 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6884 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6885 // 6886 // map(to: s.ps->ps) 6887 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6888 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6889 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 6890 // 6891 // map(s.ps->ps->ps) 6892 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6893 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6894 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6895 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6896 // 6897 // map(to: s.ps->ps->s.f[:22]) 6898 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6899 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6900 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6901 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6902 // 6903 // map(ps) 6904 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 6905 // 6906 // map(ps->i) 6907 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 6908 // 6909 // map(ps->s.f) 6910 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6911 // 6912 // map(from: ps->p) 6913 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 6914 // 6915 // map(to: ps->p[:22]) 6916 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 6917 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 6918 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 6919 // 6920 // map(ps->ps) 6921 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6922 // 6923 // map(from: ps->ps->s.i) 6924 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6925 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6926 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6927 // 6928 // map(from: ps->ps->ps) 6929 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6930 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6931 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6932 // 6933 // map(ps->ps->ps->ps) 6934 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6935 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6936 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6937 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6938 // 6939 // map(to: ps->ps->ps->s.f[:22]) 6940 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6941 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6942 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6943 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6944 // 6945 // map(to: s.f[:22]) map(from: s.p[:33]) 6946 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 6947 // sizeof(double*) (**), TARGET_PARAM 6948 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 6949 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 6950 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6951 // (*) allocate contiguous space needed to fit all mapped members even if 6952 // we allocate space for members not mapped (in this example, 6953 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 6954 // them as well because they fall between &s.f[0] and &s.p) 6955 // 6956 // map(from: s.f[:22]) map(to: ps->p[:33]) 6957 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 6958 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 6959 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 6960 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 6961 // (*) the struct this entry pertains to is the 2nd element in the list of 6962 // arguments, hence MEMBER_OF(2) 6963 // 6964 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 6965 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 6966 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 6967 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 6968 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 6969 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 6970 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 6971 // (*) the struct this entry pertains to is the 4th element in the list 6972 // of arguments, hence MEMBER_OF(4) 6973 6974 // Track if the map information being generated is the first for a capture. 6975 bool IsCaptureFirstInfo = IsFirstComponentList; 6976 bool IsLink = false; // Is this variable a "declare target link"? 6977 6978 // Scan the components from the base to the complete expression. 6979 auto CI = Components.rbegin(); 6980 auto CE = Components.rend(); 6981 auto I = CI; 6982 6983 // Track if the map information being generated is the first for a list of 6984 // components. 6985 bool IsExpressionFirstInfo = true; 6986 Address BP = Address::invalid(); 6987 6988 if (isa<MemberExpr>(I->getAssociatedExpression())) { 6989 // The base is the 'this' pointer. The content of the pointer is going 6990 // to be the base of the field being mapped. 6991 BP = CGF.LoadCXXThisAddress(); 6992 } else { 6993 // The base is the reference to the variable. 6994 // BP = &Var. 6995 BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 6996 if (const auto *VD = 6997 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 6998 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 6999 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) 7000 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { 7001 IsLink = true; 7002 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 7003 } 7004 } 7005 7006 // If the variable is a pointer and is being dereferenced (i.e. is not 7007 // the last component), the base has to be the pointer itself, not its 7008 // reference. References are ignored for mapping purposes. 7009 QualType Ty = 7010 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7011 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7012 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7013 7014 // We do not need to generate individual map information for the 7015 // pointer, it can be associated with the combined storage. 7016 ++I; 7017 } 7018 } 7019 7020 // Track whether a component of the list should be marked as MEMBER_OF some 7021 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7022 // in a component list should be marked as MEMBER_OF, all subsequent entries 7023 // do not belong to the base struct. E.g. 7024 // struct S2 s; 7025 // s.ps->ps->ps->f[:] 7026 // (1) (2) (3) (4) 7027 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7028 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7029 // is the pointee of ps(2) which is not member of struct s, so it should not 7030 // be marked as such (it is still PTR_AND_OBJ). 7031 // The variable is initialized to false so that PTR_AND_OBJ entries which 7032 // are not struct members are not considered (e.g. array of pointers to 7033 // data). 7034 bool ShouldBeMemberOf = false; 7035 7036 // Variable keeping track of whether or not we have encountered a component 7037 // in the component list which is a member expression. Useful when we have a 7038 // pointer or a final array section, in which case it is the previous 7039 // component in the list which tells us whether we have a member expression. 7040 // E.g. X.f[:] 7041 // While processing the final array section "[:]" it is "f" which tells us 7042 // whether we are dealing with a member of a declared struct. 7043 const MemberExpr *EncounteredME = nullptr; 7044 7045 for (; I != CE; ++I) { 7046 // If the current component is member of a struct (parent struct) mark it. 7047 if (!EncounteredME) { 7048 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7049 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7050 // as MEMBER_OF the parent struct. 7051 if (EncounteredME) 7052 ShouldBeMemberOf = true; 7053 } 7054 7055 auto Next = std::next(I); 7056 7057 // We need to generate the addresses and sizes if this is the last 7058 // component, if the component is a pointer or if it is an array section 7059 // whose length can't be proved to be one. If this is a pointer, it 7060 // becomes the base address for the following components. 7061 7062 // A final array section, is one whose length can't be proved to be one. 7063 bool IsFinalArraySection = 7064 isFinalArraySectionExpression(I->getAssociatedExpression()); 7065 7066 // Get information on whether the element is a pointer. Have to do a 7067 // special treatment for array sections given that they are built-in 7068 // types. 7069 const auto *OASE = 7070 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7071 bool IsPointer = 7072 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7073 .getCanonicalType() 7074 ->isAnyPointerType()) || 7075 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7076 7077 if (Next == CE || IsPointer || IsFinalArraySection) { 7078 // If this is not the last component, we expect the pointer to be 7079 // associated with an array expression or member expression. 7080 assert((Next == CE || 7081 isa<MemberExpr>(Next->getAssociatedExpression()) || 7082 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7083 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7084 "Unexpected expression"); 7085 7086 Address LB = 7087 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7088 7089 // If this component is a pointer inside the base struct then we don't 7090 // need to create any entry for it - it will be combined with the object 7091 // it is pointing to into a single PTR_AND_OBJ entry. 7092 bool IsMemberPointer = 7093 IsPointer && EncounteredME && 7094 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7095 EncounteredME); 7096 if (!OverlappedElements.empty()) { 7097 // Handle base element with the info for overlapped elements. 7098 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7099 assert(Next == CE && 7100 "Expected last element for the overlapped elements."); 7101 assert(!IsPointer && 7102 "Unexpected base element with the pointer type."); 7103 // Mark the whole struct as the struct that requires allocation on the 7104 // device. 7105 PartialStruct.LowestElem = {0, LB}; 7106 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7107 I->getAssociatedExpression()->getType()); 7108 Address HB = CGF.Builder.CreateConstGEP( 7109 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7110 CGF.VoidPtrTy), 7111 TypeSize.getQuantity() - 1, CharUnits::One()); 7112 PartialStruct.HighestElem = { 7113 std::numeric_limits<decltype( 7114 PartialStruct.HighestElem.first)>::max(), 7115 HB}; 7116 PartialStruct.Base = BP; 7117 // Emit data for non-overlapped data. 7118 OpenMPOffloadMappingFlags Flags = 7119 OMP_MAP_MEMBER_OF | 7120 getMapTypeBits(MapType, MapTypeModifier, IsImplicit, 7121 /*AddPtrFlag=*/false, 7122 /*AddIsTargetParamFlag=*/false); 7123 LB = BP; 7124 llvm::Value *Size = nullptr; 7125 // Do bitcopy of all non-overlapped structure elements. 7126 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7127 Component : OverlappedElements) { 7128 Address ComponentLB = Address::invalid(); 7129 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7130 Component) { 7131 if (MC.getAssociatedDeclaration()) { 7132 ComponentLB = 7133 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7134 .getAddress(); 7135 Size = CGF.Builder.CreatePtrDiff( 7136 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7137 CGF.EmitCastToVoidPtr(LB.getPointer())); 7138 break; 7139 } 7140 } 7141 BasePointers.push_back(BP.getPointer()); 7142 Pointers.push_back(LB.getPointer()); 7143 Sizes.push_back(Size); 7144 Types.push_back(Flags); 7145 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1, 7146 CGF.getPointerSize()); 7147 } 7148 BasePointers.push_back(BP.getPointer()); 7149 Pointers.push_back(LB.getPointer()); 7150 Size = CGF.Builder.CreatePtrDiff( 7151 CGF.EmitCastToVoidPtr( 7152 CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One()) 7153 .getPointer()), 7154 CGF.EmitCastToVoidPtr(LB.getPointer())); 7155 Sizes.push_back(Size); 7156 Types.push_back(Flags); 7157 break; 7158 } 7159 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7160 if (!IsMemberPointer) { 7161 BasePointers.push_back(BP.getPointer()); 7162 Pointers.push_back(LB.getPointer()); 7163 Sizes.push_back(Size); 7164 7165 // We need to add a pointer flag for each map that comes from the 7166 // same expression except for the first one. We also need to signal 7167 // this map is the first one that relates with the current capture 7168 // (there is a set of entries for each capture). 7169 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7170 MapType, MapTypeModifier, IsImplicit, 7171 !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); 7172 7173 if (!IsExpressionFirstInfo) { 7174 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7175 // then we reset the TO/FROM/ALWAYS/DELETE flags. 7176 if (IsPointer) 7177 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7178 OMP_MAP_DELETE); 7179 7180 if (ShouldBeMemberOf) { 7181 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7182 // should be later updated with the correct value of MEMBER_OF. 7183 Flags |= OMP_MAP_MEMBER_OF; 7184 // From now on, all subsequent PTR_AND_OBJ entries should not be 7185 // marked as MEMBER_OF. 7186 ShouldBeMemberOf = false; 7187 } 7188 } 7189 7190 Types.push_back(Flags); 7191 } 7192 7193 // If we have encountered a member expression so far, keep track of the 7194 // mapped member. If the parent is "*this", then the value declaration 7195 // is nullptr. 7196 if (EncounteredME) { 7197 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7198 unsigned FieldIndex = FD->getFieldIndex(); 7199 7200 // Update info about the lowest and highest elements for this struct 7201 if (!PartialStruct.Base.isValid()) { 7202 PartialStruct.LowestElem = {FieldIndex, LB}; 7203 PartialStruct.HighestElem = {FieldIndex, LB}; 7204 PartialStruct.Base = BP; 7205 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7206 PartialStruct.LowestElem = {FieldIndex, LB}; 7207 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7208 PartialStruct.HighestElem = {FieldIndex, LB}; 7209 } 7210 } 7211 7212 // If we have a final array section, we are done with this expression. 7213 if (IsFinalArraySection) 7214 break; 7215 7216 // The pointer becomes the base for the next element. 7217 if (Next != CE) 7218 BP = LB; 7219 7220 IsExpressionFirstInfo = false; 7221 IsCaptureFirstInfo = false; 7222 } 7223 } 7224 } 7225 7226 /// Return the adjusted map modifiers if the declaration a capture refers to 7227 /// appears in a first-private clause. This is expected to be used only with 7228 /// directives that start with 'target'. 7229 MappableExprsHandler::OpenMPOffloadMappingFlags 7230 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7231 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7232 7233 // A first private variable captured by reference will use only the 7234 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7235 // declaration is known as first-private in this handler. 7236 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 7237 return MappableExprsHandler::OMP_MAP_PRIVATE | 7238 MappableExprsHandler::OMP_MAP_TO; 7239 return MappableExprsHandler::OMP_MAP_TO | 7240 MappableExprsHandler::OMP_MAP_FROM; 7241 } 7242 7243 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7244 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. 7245 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7246 << 48); 7247 } 7248 7249 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7250 OpenMPOffloadMappingFlags MemberOfFlag) { 7251 // If the entry is PTR_AND_OBJ but has not been marked with the special 7252 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7253 // marked as MEMBER_OF. 7254 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7255 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7256 return; 7257 7258 // Reset the placeholder value to prepare the flag for the assignment of the 7259 // proper MEMBER_OF value. 7260 Flags &= ~OMP_MAP_MEMBER_OF; 7261 Flags |= MemberOfFlag; 7262 } 7263 7264 void getPlainLayout(const CXXRecordDecl *RD, 7265 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7266 bool AsBase) const { 7267 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7268 7269 llvm::StructType *St = 7270 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7271 7272 unsigned NumElements = St->getNumElements(); 7273 llvm::SmallVector< 7274 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7275 RecordLayout(NumElements); 7276 7277 // Fill bases. 7278 for (const auto &I : RD->bases()) { 7279 if (I.isVirtual()) 7280 continue; 7281 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7282 // Ignore empty bases. 7283 if (Base->isEmpty() || CGF.getContext() 7284 .getASTRecordLayout(Base) 7285 .getNonVirtualSize() 7286 .isZero()) 7287 continue; 7288 7289 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7290 RecordLayout[FieldIndex] = Base; 7291 } 7292 // Fill in virtual bases. 7293 for (const auto &I : RD->vbases()) { 7294 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7295 // Ignore empty bases. 7296 if (Base->isEmpty()) 7297 continue; 7298 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7299 if (RecordLayout[FieldIndex]) 7300 continue; 7301 RecordLayout[FieldIndex] = Base; 7302 } 7303 // Fill in all the fields. 7304 assert(!RD->isUnion() && "Unexpected union."); 7305 for (const auto *Field : RD->fields()) { 7306 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7307 // will fill in later.) 7308 if (!Field->isBitField()) { 7309 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7310 RecordLayout[FieldIndex] = Field; 7311 } 7312 } 7313 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7314 &Data : RecordLayout) { 7315 if (Data.isNull()) 7316 continue; 7317 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7318 getPlainLayout(Base, Layout, /*AsBase=*/true); 7319 else 7320 Layout.push_back(Data.get<const FieldDecl *>()); 7321 } 7322 } 7323 7324 public: 7325 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7326 : CurDir(Dir), CGF(CGF) { 7327 // Extract firstprivate clause information. 7328 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7329 for (const auto *D : C->varlists()) 7330 FirstPrivateDecls.insert( 7331 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 7332 // Extract device pointer clause information. 7333 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7334 for (auto L : C->component_lists()) 7335 DevPointersMap[L.first].push_back(L.second); 7336 } 7337 7338 /// Generate code for the combined entry if we have a partially mapped struct 7339 /// and take care of the mapping flags of the arguments corresponding to 7340 /// individual struct members. 7341 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7342 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7343 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7344 const StructRangeInfoTy &PartialStruct) const { 7345 // Base is the base of the struct 7346 BasePointers.push_back(PartialStruct.Base.getPointer()); 7347 // Pointer is the address of the lowest element 7348 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7349 Pointers.push_back(LB); 7350 // Size is (addr of {highest+1} element) - (addr of lowest element) 7351 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7352 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7353 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7354 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7355 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7356 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy, 7357 /*isSinged=*/false); 7358 Sizes.push_back(Size); 7359 // Map type is always TARGET_PARAM 7360 Types.push_back(OMP_MAP_TARGET_PARAM); 7361 // Remove TARGET_PARAM flag from the first element 7362 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7363 7364 // All other current entries will be MEMBER_OF the combined entry 7365 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7366 // 0xFFFF in the MEMBER_OF field). 7367 OpenMPOffloadMappingFlags MemberOfFlag = 7368 getMemberOfFlag(BasePointers.size() - 1); 7369 for (auto &M : CurTypes) 7370 setCorrectMemberOfFlag(M, MemberOfFlag); 7371 } 7372 7373 /// Generate all the base pointers, section pointers, sizes and map 7374 /// types for the extracted mappable expressions. Also, for each item that 7375 /// relates with a device pointer, a pair of the relevant declaration and 7376 /// index where it occurs is appended to the device pointers info array. 7377 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7378 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7379 MapFlagsArrayTy &Types) const { 7380 // We have to process the component lists that relate with the same 7381 // declaration in a single chunk so that we can generate the map flags 7382 // correctly. Therefore, we organize all lists in a map. 7383 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7384 7385 // Helper function to fill the information map for the different supported 7386 // clauses. 7387 auto &&InfoGen = [&Info]( 7388 const ValueDecl *D, 7389 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7390 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 7391 bool ReturnDevicePointer, bool IsImplicit) { 7392 const ValueDecl *VD = 7393 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7394 Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, 7395 IsImplicit); 7396 }; 7397 7398 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7399 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7400 for (const auto &L : C->component_lists()) { 7401 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 7402 /*ReturnDevicePointer=*/false, C->isImplicit()); 7403 } 7404 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 7405 for (const auto &L : C->component_lists()) { 7406 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 7407 /*ReturnDevicePointer=*/false, C->isImplicit()); 7408 } 7409 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 7410 for (const auto &L : C->component_lists()) { 7411 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 7412 /*ReturnDevicePointer=*/false, C->isImplicit()); 7413 } 7414 7415 // Look at the use_device_ptr clause information and mark the existing map 7416 // entries as such. If there is no map information for an entry in the 7417 // use_device_ptr list, we create one with map type 'alloc' and zero size 7418 // section. It is the user fault if that was not mapped before. If there is 7419 // no map information and the pointer is a struct member, then we defer the 7420 // emission of that entry until the whole struct has been processed. 7421 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7422 DeferredInfo; 7423 7424 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7425 for (const auto *C : 7426 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { 7427 for (const auto &L : C->component_lists()) { 7428 assert(!L.second.empty() && "Not expecting empty list of components!"); 7429 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7430 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7431 const Expr *IE = L.second.back().getAssociatedExpression(); 7432 // If the first component is a member expression, we have to look into 7433 // 'this', which maps to null in the map of map information. Otherwise 7434 // look directly for the information. 7435 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7436 7437 // We potentially have map information for this declaration already. 7438 // Look for the first set of components that refer to it. 7439 if (It != Info.end()) { 7440 auto CI = std::find_if( 7441 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7442 return MI.Components.back().getAssociatedDeclaration() == VD; 7443 }); 7444 // If we found a map entry, signal that the pointer has to be returned 7445 // and move on to the next declaration. 7446 if (CI != It->second.end()) { 7447 CI->ReturnDevicePointer = true; 7448 continue; 7449 } 7450 } 7451 7452 // We didn't find any match in our map information - generate a zero 7453 // size array section - if the pointer is a struct member we defer this 7454 // action until the whole struct has been processed. 7455 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 7456 if (isa<MemberExpr>(IE)) { 7457 // Insert the pointer into Info to be processed by 7458 // generateInfoForComponentList. Because it is a member pointer 7459 // without a pointee, no entry will be generated for it, therefore 7460 // we need to generate one after the whole struct has been processed. 7461 // Nonetheless, generateInfoForComponentList must be called to take 7462 // the pointer into account for the calculation of the range of the 7463 // partial struct. 7464 InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown, 7465 /*ReturnDevicePointer=*/false, C->isImplicit()); 7466 DeferredInfo[nullptr].emplace_back(IE, VD); 7467 } else { 7468 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7469 this->CGF.EmitLValue(IE), IE->getExprLoc()); 7470 BasePointers.emplace_back(Ptr, VD); 7471 Pointers.push_back(Ptr); 7472 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7473 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 7474 } 7475 } 7476 } 7477 7478 for (const auto &M : Info) { 7479 // We need to know when we generate information for the first component 7480 // associated with a capture, because the mapping flags depend on it. 7481 bool IsFirstComponentList = true; 7482 7483 // Temporary versions of arrays 7484 MapBaseValuesArrayTy CurBasePointers; 7485 MapValuesArrayTy CurPointers; 7486 MapValuesArrayTy CurSizes; 7487 MapFlagsArrayTy CurTypes; 7488 StructRangeInfoTy PartialStruct; 7489 7490 for (const MapInfo &L : M.second) { 7491 assert(!L.Components.empty() && 7492 "Not expecting declaration with no component lists."); 7493 7494 // Remember the current base pointer index. 7495 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 7496 // FIXME: MSVC 2013 seems to require this-> to find the member method. 7497 this->generateInfoForComponentList( 7498 L.MapType, L.MapTypeModifier, L.Components, CurBasePointers, 7499 CurPointers, CurSizes, CurTypes, PartialStruct, 7500 IsFirstComponentList, L.IsImplicit); 7501 7502 // If this entry relates with a device pointer, set the relevant 7503 // declaration and add the 'return pointer' flag. 7504 if (L.ReturnDevicePointer) { 7505 assert(CurBasePointers.size() > CurrentBasePointersIdx && 7506 "Unexpected number of mapped base pointers."); 7507 7508 const ValueDecl *RelevantVD = 7509 L.Components.back().getAssociatedDeclaration(); 7510 assert(RelevantVD && 7511 "No relevant declaration related with device pointer??"); 7512 7513 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 7514 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 7515 } 7516 IsFirstComponentList = false; 7517 } 7518 7519 // Append any pending zero-length pointers which are struct members and 7520 // used with use_device_ptr. 7521 auto CI = DeferredInfo.find(M.first); 7522 if (CI != DeferredInfo.end()) { 7523 for (const DeferredDevicePtrEntryTy &L : CI->second) { 7524 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 7525 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7526 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 7527 CurBasePointers.emplace_back(BasePtr, L.VD); 7528 CurPointers.push_back(Ptr); 7529 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7530 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 7531 // value MEMBER_OF=FFFF so that the entry is later updated with the 7532 // correct value of MEMBER_OF. 7533 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 7534 OMP_MAP_MEMBER_OF); 7535 } 7536 } 7537 7538 // If there is an entry in PartialStruct it means we have a struct with 7539 // individual members mapped. Emit an extra combined entry. 7540 if (PartialStruct.Base.isValid()) 7541 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 7542 PartialStruct); 7543 7544 // We need to append the results of this capture to what we already have. 7545 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7546 Pointers.append(CurPointers.begin(), CurPointers.end()); 7547 Sizes.append(CurSizes.begin(), CurSizes.end()); 7548 Types.append(CurTypes.begin(), CurTypes.end()); 7549 } 7550 } 7551 7552 /// Emit capture info for lambdas for variables captured by reference. 7553 void generateInfoForLambdaCaptures( 7554 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 7555 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7556 MapFlagsArrayTy &Types, 7557 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 7558 const auto *RD = VD->getType() 7559 .getCanonicalType() 7560 .getNonReferenceType() 7561 ->getAsCXXRecordDecl(); 7562 if (!RD || !RD->isLambda()) 7563 return; 7564 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 7565 LValue VDLVal = CGF.MakeAddrLValue( 7566 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 7567 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 7568 FieldDecl *ThisCapture = nullptr; 7569 RD->getCaptureFields(Captures, ThisCapture); 7570 if (ThisCapture) { 7571 LValue ThisLVal = 7572 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 7573 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 7574 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 7575 BasePointers.push_back(ThisLVal.getPointer()); 7576 Pointers.push_back(ThisLValVal.getPointer()); 7577 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 7578 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 7579 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 7580 } 7581 for (const LambdaCapture &LC : RD->captures()) { 7582 if (LC.getCaptureKind() != LCK_ByRef) 7583 continue; 7584 const VarDecl *VD = LC.getCapturedVar(); 7585 auto It = Captures.find(VD); 7586 assert(It != Captures.end() && "Found lambda capture without field."); 7587 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 7588 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 7589 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 7590 BasePointers.push_back(VarLVal.getPointer()); 7591 Pointers.push_back(VarLValVal.getPointer()); 7592 Sizes.push_back(CGF.getTypeSize( 7593 VD->getType().getCanonicalType().getNonReferenceType())); 7594 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 7595 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 7596 } 7597 } 7598 7599 /// Set correct indices for lambdas captures. 7600 void adjustMemberOfForLambdaCaptures( 7601 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 7602 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7603 MapFlagsArrayTy &Types) const { 7604 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 7605 // Set correct member_of idx for all implicit lambda captures. 7606 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 7607 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 7608 continue; 7609 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 7610 assert(BasePtr && "Unable to find base lambda address."); 7611 int TgtIdx = -1; 7612 for (unsigned J = I; J > 0; --J) { 7613 unsigned Idx = J - 1; 7614 if (Pointers[Idx] != BasePtr) 7615 continue; 7616 TgtIdx = Idx; 7617 break; 7618 } 7619 assert(TgtIdx != -1 && "Unable to find parent lambda."); 7620 // All other current entries will be MEMBER_OF the combined entry 7621 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7622 // 0xFFFF in the MEMBER_OF field). 7623 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 7624 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 7625 } 7626 } 7627 7628 /// Generate the base pointers, section pointers, sizes and map types 7629 /// associated to a given capture. 7630 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 7631 llvm::Value *Arg, 7632 MapBaseValuesArrayTy &BasePointers, 7633 MapValuesArrayTy &Pointers, 7634 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7635 StructRangeInfoTy &PartialStruct) const { 7636 assert(!Cap->capturesVariableArrayType() && 7637 "Not expecting to generate map info for a variable array type!"); 7638 7639 // We need to know when we generating information for the first component 7640 const ValueDecl *VD = Cap->capturesThis() 7641 ? nullptr 7642 : Cap->getCapturedVar()->getCanonicalDecl(); 7643 7644 // If this declaration appears in a is_device_ptr clause we just have to 7645 // pass the pointer by value. If it is a reference to a declaration, we just 7646 // pass its value. 7647 if (DevPointersMap.count(VD)) { 7648 BasePointers.emplace_back(Arg, VD); 7649 Pointers.push_back(Arg); 7650 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 7651 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 7652 return; 7653 } 7654 7655 using MapData = 7656 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 7657 OpenMPMapClauseKind, OpenMPMapClauseKind, bool>; 7658 SmallVector<MapData, 4> DeclComponentLists; 7659 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7660 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 7661 for (const auto &L : C->decl_component_lists(VD)) { 7662 assert(L.first == VD && 7663 "We got information for the wrong declaration??"); 7664 assert(!L.second.empty() && 7665 "Not expecting declaration with no component lists."); 7666 DeclComponentLists.emplace_back(L.second, C->getMapType(), 7667 C->getMapTypeModifier(), 7668 C->isImplicit()); 7669 } 7670 } 7671 7672 // Find overlapping elements (including the offset from the base element). 7673 llvm::SmallDenseMap< 7674 const MapData *, 7675 llvm::SmallVector< 7676 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 7677 4> 7678 OverlappedData; 7679 size_t Count = 0; 7680 for (const MapData &L : DeclComponentLists) { 7681 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7682 OpenMPMapClauseKind MapType; 7683 OpenMPMapClauseKind MapTypeModifier; 7684 bool IsImplicit; 7685 std::tie(Components, MapType, MapTypeModifier, IsImplicit) = L; 7686 ++Count; 7687 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 7688 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 7689 std::tie(Components1, MapType, MapTypeModifier, IsImplicit) = L1; 7690 auto CI = Components.rbegin(); 7691 auto CE = Components.rend(); 7692 auto SI = Components1.rbegin(); 7693 auto SE = Components1.rend(); 7694 for (; CI != CE && SI != SE; ++CI, ++SI) { 7695 if (CI->getAssociatedExpression()->getStmtClass() != 7696 SI->getAssociatedExpression()->getStmtClass()) 7697 break; 7698 // Are we dealing with different variables/fields? 7699 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 7700 break; 7701 } 7702 // Found overlapping if, at least for one component, reached the head of 7703 // the components list. 7704 if (CI == CE || SI == SE) { 7705 assert((CI != CE || SI != SE) && 7706 "Unexpected full match of the mapping components."); 7707 const MapData &BaseData = CI == CE ? L : L1; 7708 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 7709 SI == SE ? Components : Components1; 7710 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 7711 OverlappedElements.getSecond().push_back(SubData); 7712 } 7713 } 7714 } 7715 // Sort the overlapped elements for each item. 7716 llvm::SmallVector<const FieldDecl *, 4> Layout; 7717 if (!OverlappedData.empty()) { 7718 if (const auto *CRD = 7719 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 7720 getPlainLayout(CRD, Layout, /*AsBase=*/false); 7721 else { 7722 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 7723 Layout.append(RD->field_begin(), RD->field_end()); 7724 } 7725 } 7726 for (auto &Pair : OverlappedData) { 7727 llvm::sort( 7728 Pair.getSecond(), 7729 [&Layout]( 7730 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 7731 OMPClauseMappableExprCommon::MappableExprComponentListRef 7732 Second) { 7733 auto CI = First.rbegin(); 7734 auto CE = First.rend(); 7735 auto SI = Second.rbegin(); 7736 auto SE = Second.rend(); 7737 for (; CI != CE && SI != SE; ++CI, ++SI) { 7738 if (CI->getAssociatedExpression()->getStmtClass() != 7739 SI->getAssociatedExpression()->getStmtClass()) 7740 break; 7741 // Are we dealing with different variables/fields? 7742 if (CI->getAssociatedDeclaration() != 7743 SI->getAssociatedDeclaration()) 7744 break; 7745 } 7746 7747 // Lists contain the same elements. 7748 if (CI == CE && SI == SE) 7749 return false; 7750 7751 // List with less elements is less than list with more elements. 7752 if (CI == CE || SI == SE) 7753 return CI == CE; 7754 7755 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 7756 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 7757 if (FD1->getParent() == FD2->getParent()) 7758 return FD1->getFieldIndex() < FD2->getFieldIndex(); 7759 const auto It = 7760 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 7761 return FD == FD1 || FD == FD2; 7762 }); 7763 return *It == FD1; 7764 }); 7765 } 7766 7767 // Associated with a capture, because the mapping flags depend on it. 7768 // Go through all of the elements with the overlapped elements. 7769 for (const auto &Pair : OverlappedData) { 7770 const MapData &L = *Pair.getFirst(); 7771 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7772 OpenMPMapClauseKind MapType; 7773 OpenMPMapClauseKind MapTypeModifier; 7774 bool IsImplicit; 7775 std::tie(Components, MapType, MapTypeModifier, IsImplicit) = L; 7776 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7777 OverlappedComponents = Pair.getSecond(); 7778 bool IsFirstComponentList = true; 7779 generateInfoForComponentList(MapType, MapTypeModifier, Components, 7780 BasePointers, Pointers, Sizes, Types, 7781 PartialStruct, IsFirstComponentList, 7782 IsImplicit, OverlappedComponents); 7783 } 7784 // Go through other elements without overlapped elements. 7785 bool IsFirstComponentList = OverlappedData.empty(); 7786 for (const MapData &L : DeclComponentLists) { 7787 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7788 OpenMPMapClauseKind MapType; 7789 OpenMPMapClauseKind MapTypeModifier; 7790 bool IsImplicit; 7791 std::tie(Components, MapType, MapTypeModifier, IsImplicit) = L; 7792 auto It = OverlappedData.find(&L); 7793 if (It == OverlappedData.end()) 7794 generateInfoForComponentList(MapType, MapTypeModifier, Components, 7795 BasePointers, Pointers, Sizes, Types, 7796 PartialStruct, IsFirstComponentList, 7797 IsImplicit); 7798 IsFirstComponentList = false; 7799 } 7800 } 7801 7802 /// Generate the base pointers, section pointers, sizes and map types 7803 /// associated with the declare target link variables. 7804 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 7805 MapValuesArrayTy &Pointers, 7806 MapValuesArrayTy &Sizes, 7807 MapFlagsArrayTy &Types) const { 7808 // Map other list items in the map clause which are not captured variables 7809 // but "declare target link" global variables., 7810 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 7811 for (const auto &L : C->component_lists()) { 7812 if (!L.first) 7813 continue; 7814 const auto *VD = dyn_cast<VarDecl>(L.first); 7815 if (!VD) 7816 continue; 7817 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7818 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 7819 if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 7820 continue; 7821 StructRangeInfoTy PartialStruct; 7822 generateInfoForComponentList( 7823 C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, 7824 Pointers, Sizes, Types, PartialStruct, 7825 /*IsFirstComponentList=*/true, C->isImplicit()); 7826 assert(!PartialStruct.Base.isValid() && 7827 "No partial structs for declare target link expected."); 7828 } 7829 } 7830 } 7831 7832 /// Generate the default map information for a given capture \a CI, 7833 /// record field declaration \a RI and captured value \a CV. 7834 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 7835 const FieldDecl &RI, llvm::Value *CV, 7836 MapBaseValuesArrayTy &CurBasePointers, 7837 MapValuesArrayTy &CurPointers, 7838 MapValuesArrayTy &CurSizes, 7839 MapFlagsArrayTy &CurMapTypes) const { 7840 // Do the default mapping. 7841 if (CI.capturesThis()) { 7842 CurBasePointers.push_back(CV); 7843 CurPointers.push_back(CV); 7844 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 7845 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 7846 // Default map type. 7847 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 7848 } else if (CI.capturesVariableByCopy()) { 7849 CurBasePointers.push_back(CV); 7850 CurPointers.push_back(CV); 7851 if (!RI.getType()->isAnyPointerType()) { 7852 // We have to signal to the runtime captures passed by value that are 7853 // not pointers. 7854 CurMapTypes.push_back(OMP_MAP_LITERAL); 7855 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 7856 } else { 7857 // Pointers are implicitly mapped with a zero size and no flags 7858 // (other than first map that is added for all implicit maps). 7859 CurMapTypes.push_back(OMP_MAP_NONE); 7860 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 7861 } 7862 } else { 7863 assert(CI.capturesVariable() && "Expected captured reference."); 7864 CurBasePointers.push_back(CV); 7865 CurPointers.push_back(CV); 7866 7867 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 7868 QualType ElementType = PtrTy->getPointeeType(); 7869 CurSizes.push_back(CGF.getTypeSize(ElementType)); 7870 // The default map type for a scalar/complex type is 'to' because by 7871 // default the value doesn't have to be retrieved. For an aggregate 7872 // type, the default is 'tofrom'. 7873 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 7874 } 7875 // Every default map produces a single argument which is a target parameter. 7876 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 7877 7878 // Add flag stating this is an implicit map. 7879 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 7880 } 7881 }; 7882 7883 enum OpenMPOffloadingReservedDeviceIDs { 7884 /// Device ID if the device was not defined, runtime should get it 7885 /// from environment variables in the spec. 7886 OMP_DEVICEID_UNDEF = -1, 7887 }; 7888 } // anonymous namespace 7889 7890 /// Emit the arrays used to pass the captures and map information to the 7891 /// offloading runtime library. If there is no map or capture information, 7892 /// return nullptr by reference. 7893 static void 7894 emitOffloadingArrays(CodeGenFunction &CGF, 7895 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 7896 MappableExprsHandler::MapValuesArrayTy &Pointers, 7897 MappableExprsHandler::MapValuesArrayTy &Sizes, 7898 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 7899 CGOpenMPRuntime::TargetDataInfo &Info) { 7900 CodeGenModule &CGM = CGF.CGM; 7901 ASTContext &Ctx = CGF.getContext(); 7902 7903 // Reset the array information. 7904 Info.clearArrayInfo(); 7905 Info.NumberOfPtrs = BasePointers.size(); 7906 7907 if (Info.NumberOfPtrs) { 7908 // Detect if we have any capture size requiring runtime evaluation of the 7909 // size so that a constant array could be eventually used. 7910 bool hasRuntimeEvaluationCaptureSize = false; 7911 for (llvm::Value *S : Sizes) 7912 if (!isa<llvm::Constant>(S)) { 7913 hasRuntimeEvaluationCaptureSize = true; 7914 break; 7915 } 7916 7917 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 7918 QualType PointerArrayType = 7919 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 7920 /*IndexTypeQuals=*/0); 7921 7922 Info.BasePointersArray = 7923 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 7924 Info.PointersArray = 7925 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 7926 7927 // If we don't have any VLA types or other types that require runtime 7928 // evaluation, we can use a constant array for the map sizes, otherwise we 7929 // need to fill up the arrays as we do for the pointers. 7930 if (hasRuntimeEvaluationCaptureSize) { 7931 QualType SizeArrayType = Ctx.getConstantArrayType( 7932 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 7933 /*IndexTypeQuals=*/0); 7934 Info.SizesArray = 7935 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 7936 } else { 7937 // We expect all the sizes to be constant, so we collect them to create 7938 // a constant array. 7939 SmallVector<llvm::Constant *, 16> ConstSizes; 7940 for (llvm::Value *S : Sizes) 7941 ConstSizes.push_back(cast<llvm::Constant>(S)); 7942 7943 auto *SizesArrayInit = llvm::ConstantArray::get( 7944 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 7945 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 7946 auto *SizesArrayGbl = new llvm::GlobalVariable( 7947 CGM.getModule(), SizesArrayInit->getType(), 7948 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 7949 SizesArrayInit, Name); 7950 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 7951 Info.SizesArray = SizesArrayGbl; 7952 } 7953 7954 // The map types are always constant so we don't need to generate code to 7955 // fill arrays. Instead, we create an array constant. 7956 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 7957 llvm::copy(MapTypes, Mapping.begin()); 7958 llvm::Constant *MapTypesArrayInit = 7959 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 7960 std::string MaptypesName = 7961 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 7962 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 7963 CGM.getModule(), MapTypesArrayInit->getType(), 7964 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 7965 MapTypesArrayInit, MaptypesName); 7966 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 7967 Info.MapTypesArray = MapTypesArrayGbl; 7968 7969 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 7970 llvm::Value *BPVal = *BasePointers[I]; 7971 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 7972 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7973 Info.BasePointersArray, 0, I); 7974 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7975 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 7976 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 7977 CGF.Builder.CreateStore(BPVal, BPAddr); 7978 7979 if (Info.requiresDevicePointerInfo()) 7980 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 7981 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 7982 7983 llvm::Value *PVal = Pointers[I]; 7984 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 7985 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7986 Info.PointersArray, 0, I); 7987 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7988 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 7989 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 7990 CGF.Builder.CreateStore(PVal, PAddr); 7991 7992 if (hasRuntimeEvaluationCaptureSize) { 7993 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 7994 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 7995 Info.SizesArray, 7996 /*Idx0=*/0, 7997 /*Idx1=*/I); 7998 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 7999 CGF.Builder.CreateStore( 8000 CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), 8001 SAddr); 8002 } 8003 } 8004 } 8005 } 8006 /// Emit the arguments to be passed to the runtime library based on the 8007 /// arrays of pointers, sizes and map types. 8008 static void emitOffloadingArraysArgument( 8009 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8010 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8011 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8012 CodeGenModule &CGM = CGF.CGM; 8013 if (Info.NumberOfPtrs) { 8014 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8015 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8016 Info.BasePointersArray, 8017 /*Idx0=*/0, /*Idx1=*/0); 8018 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8019 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8020 Info.PointersArray, 8021 /*Idx0=*/0, 8022 /*Idx1=*/0); 8023 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8024 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 8025 /*Idx0=*/0, /*Idx1=*/0); 8026 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8027 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8028 Info.MapTypesArray, 8029 /*Idx0=*/0, 8030 /*Idx1=*/0); 8031 } else { 8032 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8033 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8034 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 8035 MapTypesArrayArg = 8036 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8037 } 8038 } 8039 8040 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 8041 const OMPExecutableDirective &D, 8042 llvm::Value *OutlinedFn, 8043 llvm::Value *OutlinedFnID, 8044 const Expr *IfCond, const Expr *Device) { 8045 if (!CGF.HaveInsertPoint()) 8046 return; 8047 8048 assert(OutlinedFn && "Invalid outlined function!"); 8049 8050 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 8051 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 8052 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 8053 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 8054 PrePostActionTy &) { 8055 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8056 }; 8057 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 8058 8059 CodeGenFunction::OMPTargetDataInfo InputInfo; 8060 llvm::Value *MapTypesArray = nullptr; 8061 // Fill up the pointer arrays and transfer execution to the device. 8062 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 8063 &MapTypesArray, &CS, RequiresOuterTask, 8064 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 8065 // On top of the arrays that were filled up, the target offloading call 8066 // takes as arguments the device id as well as the host pointer. The host 8067 // pointer is used by the runtime library to identify the current target 8068 // region, so it only has to be unique and not necessarily point to 8069 // anything. It could be the pointer to the outlined function that 8070 // implements the target region, but we aren't using that so that the 8071 // compiler doesn't need to keep that, and could therefore inline the host 8072 // function if proven worthwhile during optimization. 8073 8074 // From this point on, we need to have an ID of the target region defined. 8075 assert(OutlinedFnID && "Invalid outlined function ID!"); 8076 8077 // Emit device ID if any. 8078 llvm::Value *DeviceID; 8079 if (Device) { 8080 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8081 CGF.Int64Ty, /*isSigned=*/true); 8082 } else { 8083 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8084 } 8085 8086 // Emit the number of elements in the offloading arrays. 8087 llvm::Value *PointerNum = 8088 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8089 8090 // Return value of the runtime offloading call. 8091 llvm::Value *Return; 8092 8093 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); 8094 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); 8095 8096 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8097 // The target region is an outlined function launched by the runtime 8098 // via calls __tgt_target() or __tgt_target_teams(). 8099 // 8100 // __tgt_target() launches a target region with one team and one thread, 8101 // executing a serial region. This master thread may in turn launch 8102 // more threads within its team upon encountering a parallel region, 8103 // however, no additional teams can be launched on the device. 8104 // 8105 // __tgt_target_teams() launches a target region with one or more teams, 8106 // each with one or more threads. This call is required for target 8107 // constructs such as: 8108 // 'target teams' 8109 // 'target' / 'teams' 8110 // 'target teams distribute parallel for' 8111 // 'target parallel' 8112 // and so on. 8113 // 8114 // Note that on the host and CPU targets, the runtime implementation of 8115 // these calls simply call the outlined function without forking threads. 8116 // The outlined functions themselves have runtime calls to 8117 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 8118 // the compiler in emitTeamsCall() and emitParallelCall(). 8119 // 8120 // In contrast, on the NVPTX target, the implementation of 8121 // __tgt_target_teams() launches a GPU kernel with the requested number 8122 // of teams and threads so no additional calls to the runtime are required. 8123 if (NumTeams) { 8124 // If we have NumTeams defined this means that we have an enclosed teams 8125 // region. Therefore we also expect to have NumThreads defined. These two 8126 // values should be defined in the presence of a teams directive, 8127 // regardless of having any clauses associated. If the user is using teams 8128 // but no clauses, these two values will be the default that should be 8129 // passed to the runtime library - a 32-bit integer with the value zero. 8130 assert(NumThreads && "Thread limit expression should be available along " 8131 "with number of teams."); 8132 llvm::Value *OffloadingArgs[] = {DeviceID, 8133 OutlinedFnID, 8134 PointerNum, 8135 InputInfo.BasePointersArray.getPointer(), 8136 InputInfo.PointersArray.getPointer(), 8137 InputInfo.SizesArray.getPointer(), 8138 MapTypesArray, 8139 NumTeams, 8140 NumThreads}; 8141 Return = CGF.EmitRuntimeCall( 8142 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 8143 : OMPRTL__tgt_target_teams), 8144 OffloadingArgs); 8145 } else { 8146 llvm::Value *OffloadingArgs[] = {DeviceID, 8147 OutlinedFnID, 8148 PointerNum, 8149 InputInfo.BasePointersArray.getPointer(), 8150 InputInfo.PointersArray.getPointer(), 8151 InputInfo.SizesArray.getPointer(), 8152 MapTypesArray}; 8153 Return = CGF.EmitRuntimeCall( 8154 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 8155 : OMPRTL__tgt_target), 8156 OffloadingArgs); 8157 } 8158 8159 // Check the error code and execute the host version if required. 8160 llvm::BasicBlock *OffloadFailedBlock = 8161 CGF.createBasicBlock("omp_offload.failed"); 8162 llvm::BasicBlock *OffloadContBlock = 8163 CGF.createBasicBlock("omp_offload.cont"); 8164 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 8165 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 8166 8167 CGF.EmitBlock(OffloadFailedBlock); 8168 if (RequiresOuterTask) { 8169 CapturedVars.clear(); 8170 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8171 } 8172 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8173 CGF.EmitBranch(OffloadContBlock); 8174 8175 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 8176 }; 8177 8178 // Notify that the host version must be executed. 8179 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 8180 RequiresOuterTask](CodeGenFunction &CGF, 8181 PrePostActionTy &) { 8182 if (RequiresOuterTask) { 8183 CapturedVars.clear(); 8184 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8185 } 8186 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8187 }; 8188 8189 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 8190 &CapturedVars, RequiresOuterTask, 8191 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 8192 // Fill up the arrays with all the captured variables. 8193 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8194 MappableExprsHandler::MapValuesArrayTy Pointers; 8195 MappableExprsHandler::MapValuesArrayTy Sizes; 8196 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8197 8198 // Get mappable expression information. 8199 MappableExprsHandler MEHandler(D, CGF); 8200 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 8201 8202 auto RI = CS.getCapturedRecordDecl()->field_begin(); 8203 auto CV = CapturedVars.begin(); 8204 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 8205 CE = CS.capture_end(); 8206 CI != CE; ++CI, ++RI, ++CV) { 8207 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 8208 MappableExprsHandler::MapValuesArrayTy CurPointers; 8209 MappableExprsHandler::MapValuesArrayTy CurSizes; 8210 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 8211 MappableExprsHandler::StructRangeInfoTy PartialStruct; 8212 8213 // VLA sizes are passed to the outlined region by copy and do not have map 8214 // information associated. 8215 if (CI->capturesVariableArrayType()) { 8216 CurBasePointers.push_back(*CV); 8217 CurPointers.push_back(*CV); 8218 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 8219 // Copy to the device as an argument. No need to retrieve it. 8220 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 8221 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 8222 } else { 8223 // If we have any information in the map clause, we use it, otherwise we 8224 // just do a default mapping. 8225 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 8226 CurSizes, CurMapTypes, PartialStruct); 8227 if (CurBasePointers.empty()) 8228 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 8229 CurPointers, CurSizes, CurMapTypes); 8230 // Generate correct mapping for variables captured by reference in 8231 // lambdas. 8232 if (CI->capturesVariable()) 8233 MEHandler.generateInfoForLambdaCaptures( 8234 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 8235 CurMapTypes, LambdaPointers); 8236 } 8237 // We expect to have at least an element of information for this capture. 8238 assert(!CurBasePointers.empty() && 8239 "Non-existing map pointer for capture!"); 8240 assert(CurBasePointers.size() == CurPointers.size() && 8241 CurBasePointers.size() == CurSizes.size() && 8242 CurBasePointers.size() == CurMapTypes.size() && 8243 "Inconsistent map information sizes!"); 8244 8245 // If there is an entry in PartialStruct it means we have a struct with 8246 // individual members mapped. Emit an extra combined entry. 8247 if (PartialStruct.Base.isValid()) 8248 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 8249 CurMapTypes, PartialStruct); 8250 8251 // We need to append the results of this capture to what we already have. 8252 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8253 Pointers.append(CurPointers.begin(), CurPointers.end()); 8254 Sizes.append(CurSizes.begin(), CurSizes.end()); 8255 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 8256 } 8257 // Adjust MEMBER_OF flags for the lambdas captures. 8258 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 8259 Pointers, MapTypes); 8260 // Map other list items in the map clause which are not captured variables 8261 // but "declare target link" global variables. 8262 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 8263 MapTypes); 8264 8265 TargetDataInfo Info; 8266 // Fill up the arrays and create the arguments. 8267 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8268 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8269 Info.PointersArray, Info.SizesArray, 8270 Info.MapTypesArray, Info); 8271 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8272 InputInfo.BasePointersArray = 8273 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8274 InputInfo.PointersArray = 8275 Address(Info.PointersArray, CGM.getPointerAlign()); 8276 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 8277 MapTypesArray = Info.MapTypesArray; 8278 if (RequiresOuterTask) 8279 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8280 else 8281 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8282 }; 8283 8284 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 8285 CodeGenFunction &CGF, PrePostActionTy &) { 8286 if (RequiresOuterTask) { 8287 CodeGenFunction::OMPTargetDataInfo InputInfo; 8288 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 8289 } else { 8290 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 8291 } 8292 }; 8293 8294 // If we have a target function ID it means that we need to support 8295 // offloading, otherwise, just execute on the host. We need to execute on host 8296 // regardless of the conditional in the if clause if, e.g., the user do not 8297 // specify target triples. 8298 if (OutlinedFnID) { 8299 if (IfCond) { 8300 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 8301 } else { 8302 RegionCodeGenTy ThenRCG(TargetThenGen); 8303 ThenRCG(CGF); 8304 } 8305 } else { 8306 RegionCodeGenTy ElseRCG(TargetElseGen); 8307 ElseRCG(CGF); 8308 } 8309 } 8310 8311 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 8312 StringRef ParentName) { 8313 if (!S) 8314 return; 8315 8316 // Codegen OMP target directives that offload compute to the device. 8317 bool RequiresDeviceCodegen = 8318 isa<OMPExecutableDirective>(S) && 8319 isOpenMPTargetExecutionDirective( 8320 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 8321 8322 if (RequiresDeviceCodegen) { 8323 const auto &E = *cast<OMPExecutableDirective>(S); 8324 unsigned DeviceID; 8325 unsigned FileID; 8326 unsigned Line; 8327 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 8328 FileID, Line); 8329 8330 // Is this a target region that should not be emitted as an entry point? If 8331 // so just signal we are done with this target region. 8332 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 8333 ParentName, Line)) 8334 return; 8335 8336 switch (E.getDirectiveKind()) { 8337 case OMPD_target: 8338 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 8339 cast<OMPTargetDirective>(E)); 8340 break; 8341 case OMPD_target_parallel: 8342 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 8343 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 8344 break; 8345 case OMPD_target_teams: 8346 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 8347 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 8348 break; 8349 case OMPD_target_teams_distribute: 8350 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 8351 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 8352 break; 8353 case OMPD_target_teams_distribute_simd: 8354 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 8355 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 8356 break; 8357 case OMPD_target_parallel_for: 8358 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 8359 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 8360 break; 8361 case OMPD_target_parallel_for_simd: 8362 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 8363 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 8364 break; 8365 case OMPD_target_simd: 8366 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 8367 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 8368 break; 8369 case OMPD_target_teams_distribute_parallel_for: 8370 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 8371 CGM, ParentName, 8372 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 8373 break; 8374 case OMPD_target_teams_distribute_parallel_for_simd: 8375 CodeGenFunction:: 8376 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 8377 CGM, ParentName, 8378 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 8379 break; 8380 case OMPD_parallel: 8381 case OMPD_for: 8382 case OMPD_parallel_for: 8383 case OMPD_parallel_sections: 8384 case OMPD_for_simd: 8385 case OMPD_parallel_for_simd: 8386 case OMPD_cancel: 8387 case OMPD_cancellation_point: 8388 case OMPD_ordered: 8389 case OMPD_threadprivate: 8390 case OMPD_task: 8391 case OMPD_simd: 8392 case OMPD_sections: 8393 case OMPD_section: 8394 case OMPD_single: 8395 case OMPD_master: 8396 case OMPD_critical: 8397 case OMPD_taskyield: 8398 case OMPD_barrier: 8399 case OMPD_taskwait: 8400 case OMPD_taskgroup: 8401 case OMPD_atomic: 8402 case OMPD_flush: 8403 case OMPD_teams: 8404 case OMPD_target_data: 8405 case OMPD_target_exit_data: 8406 case OMPD_target_enter_data: 8407 case OMPD_distribute: 8408 case OMPD_distribute_simd: 8409 case OMPD_distribute_parallel_for: 8410 case OMPD_distribute_parallel_for_simd: 8411 case OMPD_teams_distribute: 8412 case OMPD_teams_distribute_simd: 8413 case OMPD_teams_distribute_parallel_for: 8414 case OMPD_teams_distribute_parallel_for_simd: 8415 case OMPD_target_update: 8416 case OMPD_declare_simd: 8417 case OMPD_declare_target: 8418 case OMPD_end_declare_target: 8419 case OMPD_declare_reduction: 8420 case OMPD_taskloop: 8421 case OMPD_taskloop_simd: 8422 case OMPD_requires: 8423 case OMPD_unknown: 8424 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 8425 } 8426 return; 8427 } 8428 8429 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 8430 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 8431 return; 8432 8433 scanForTargetRegionsFunctions( 8434 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 8435 return; 8436 } 8437 8438 // If this is a lambda function, look into its body. 8439 if (const auto *L = dyn_cast<LambdaExpr>(S)) 8440 S = L->getBody(); 8441 8442 // Keep looking for target regions recursively. 8443 for (const Stmt *II : S->children()) 8444 scanForTargetRegionsFunctions(II, ParentName); 8445 } 8446 8447 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 8448 // If emitting code for the host, we do not process FD here. Instead we do 8449 // the normal code generation. 8450 if (!CGM.getLangOpts().OpenMPIsDevice) 8451 return false; 8452 8453 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 8454 StringRef Name = CGM.getMangledName(GD); 8455 // Try to detect target regions in the function. 8456 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) 8457 scanForTargetRegionsFunctions(FD->getBody(), Name); 8458 8459 // Do not to emit function if it is not marked as declare target. 8460 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 8461 AlreadyEmittedTargetFunctions.count(Name) == 0; 8462 } 8463 8464 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 8465 if (!CGM.getLangOpts().OpenMPIsDevice) 8466 return false; 8467 8468 // Check if there are Ctors/Dtors in this declaration and look for target 8469 // regions in it. We use the complete variant to produce the kernel name 8470 // mangling. 8471 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 8472 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 8473 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 8474 StringRef ParentName = 8475 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 8476 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 8477 } 8478 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 8479 StringRef ParentName = 8480 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 8481 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 8482 } 8483 } 8484 8485 // Do not to emit variable if it is not marked as declare target. 8486 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8487 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 8488 cast<VarDecl>(GD.getDecl())); 8489 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { 8490 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 8491 return true; 8492 } 8493 return false; 8494 } 8495 8496 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 8497 llvm::Constant *Addr) { 8498 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8499 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8500 if (!Res) { 8501 if (CGM.getLangOpts().OpenMPIsDevice) { 8502 // Register non-target variables being emitted in device code (debug info 8503 // may cause this). 8504 StringRef VarName = CGM.getMangledName(VD); 8505 EmittedNonTargetVariables.try_emplace(VarName, Addr); 8506 } 8507 return; 8508 } 8509 // Register declare target variables. 8510 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 8511 StringRef VarName; 8512 CharUnits VarSize; 8513 llvm::GlobalValue::LinkageTypes Linkage; 8514 switch (*Res) { 8515 case OMPDeclareTargetDeclAttr::MT_To: 8516 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 8517 VarName = CGM.getMangledName(VD); 8518 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 8519 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 8520 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 8521 } else { 8522 VarSize = CharUnits::Zero(); 8523 } 8524 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 8525 // Temp solution to prevent optimizations of the internal variables. 8526 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 8527 std::string RefName = getName({VarName, "ref"}); 8528 if (!CGM.GetGlobalValue(RefName)) { 8529 llvm::Constant *AddrRef = 8530 getOrCreateInternalVariable(Addr->getType(), RefName); 8531 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 8532 GVAddrRef->setConstant(/*Val=*/true); 8533 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 8534 GVAddrRef->setInitializer(Addr); 8535 CGM.addCompilerUsedGlobal(GVAddrRef); 8536 } 8537 } 8538 break; 8539 case OMPDeclareTargetDeclAttr::MT_Link: 8540 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 8541 if (CGM.getLangOpts().OpenMPIsDevice) { 8542 VarName = Addr->getName(); 8543 Addr = nullptr; 8544 } else { 8545 VarName = getAddrOfDeclareTargetLink(VD).getName(); 8546 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); 8547 } 8548 VarSize = CGM.getPointerSize(); 8549 Linkage = llvm::GlobalValue::WeakAnyLinkage; 8550 break; 8551 } 8552 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 8553 VarName, Addr, VarSize, Flags, Linkage); 8554 } 8555 8556 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 8557 if (isa<FunctionDecl>(GD.getDecl()) || 8558 isa<OMPDeclareReductionDecl>(GD.getDecl())) 8559 return emitTargetFunctions(GD); 8560 8561 return emitTargetGlobalVariable(GD); 8562 } 8563 8564 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 8565 for (const VarDecl *VD : DeferredGlobalVariables) { 8566 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8567 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8568 if (!Res) 8569 continue; 8570 if (*Res == OMPDeclareTargetDeclAttr::MT_To) { 8571 CGM.EmitGlobal(VD); 8572 } else { 8573 assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && 8574 "Expected to or link clauses."); 8575 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 8576 } 8577 } 8578 } 8579 8580 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 8581 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 8582 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 8583 " Expected target-based directive."); 8584 } 8585 8586 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 8587 CodeGenModule &CGM) 8588 : CGM(CGM) { 8589 if (CGM.getLangOpts().OpenMPIsDevice) { 8590 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 8591 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 8592 } 8593 } 8594 8595 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 8596 if (CGM.getLangOpts().OpenMPIsDevice) 8597 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 8598 } 8599 8600 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 8601 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 8602 return true; 8603 8604 StringRef Name = CGM.getMangledName(GD); 8605 const auto *D = cast<FunctionDecl>(GD.getDecl()); 8606 // Do not to emit function if it is marked as declare target as it was already 8607 // emitted. 8608 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 8609 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 8610 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 8611 return !F->isDeclaration(); 8612 return false; 8613 } 8614 return true; 8615 } 8616 8617 return !AlreadyEmittedTargetFunctions.insert(Name).second; 8618 } 8619 8620 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 8621 // If we have offloading in the current module, we need to emit the entries 8622 // now and register the offloading descriptor. 8623 createOffloadEntriesAndInfoMetadata(); 8624 8625 // Create and register the offloading binary descriptors. This is the main 8626 // entity that captures all the information about offloading in the current 8627 // compilation unit. 8628 return createOffloadingBinaryDescriptorRegistration(); 8629 } 8630 8631 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 8632 const OMPExecutableDirective &D, 8633 SourceLocation Loc, 8634 llvm::Value *OutlinedFn, 8635 ArrayRef<llvm::Value *> CapturedVars) { 8636 if (!CGF.HaveInsertPoint()) 8637 return; 8638 8639 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 8640 CodeGenFunction::RunCleanupsScope Scope(CGF); 8641 8642 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 8643 llvm::Value *Args[] = { 8644 RTLoc, 8645 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 8646 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 8647 llvm::SmallVector<llvm::Value *, 16> RealArgs; 8648 RealArgs.append(std::begin(Args), std::end(Args)); 8649 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 8650 8651 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 8652 CGF.EmitRuntimeCall(RTLFn, RealArgs); 8653 } 8654 8655 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 8656 const Expr *NumTeams, 8657 const Expr *ThreadLimit, 8658 SourceLocation Loc) { 8659 if (!CGF.HaveInsertPoint()) 8660 return; 8661 8662 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 8663 8664 llvm::Value *NumTeamsVal = 8665 NumTeams 8666 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 8667 CGF.CGM.Int32Ty, /* isSigned = */ true) 8668 : CGF.Builder.getInt32(0); 8669 8670 llvm::Value *ThreadLimitVal = 8671 ThreadLimit 8672 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 8673 CGF.CGM.Int32Ty, /* isSigned = */ true) 8674 : CGF.Builder.getInt32(0); 8675 8676 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 8677 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 8678 ThreadLimitVal}; 8679 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 8680 PushNumTeamsArgs); 8681 } 8682 8683 void CGOpenMPRuntime::emitTargetDataCalls( 8684 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8685 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 8686 if (!CGF.HaveInsertPoint()) 8687 return; 8688 8689 // Action used to replace the default codegen action and turn privatization 8690 // off. 8691 PrePostActionTy NoPrivAction; 8692 8693 // Generate the code for the opening of the data environment. Capture all the 8694 // arguments of the runtime call by reference because they are used in the 8695 // closing of the region. 8696 auto &&BeginThenGen = [this, &D, Device, &Info, 8697 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 8698 // Fill up the arrays with all the mapped variables. 8699 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8700 MappableExprsHandler::MapValuesArrayTy Pointers; 8701 MappableExprsHandler::MapValuesArrayTy Sizes; 8702 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8703 8704 // Get map clause information. 8705 MappableExprsHandler MCHandler(D, CGF); 8706 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 8707 8708 // Fill up the arrays and create the arguments. 8709 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8710 8711 llvm::Value *BasePointersArrayArg = nullptr; 8712 llvm::Value *PointersArrayArg = nullptr; 8713 llvm::Value *SizesArrayArg = nullptr; 8714 llvm::Value *MapTypesArrayArg = nullptr; 8715 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 8716 SizesArrayArg, MapTypesArrayArg, Info); 8717 8718 // Emit device ID if any. 8719 llvm::Value *DeviceID = nullptr; 8720 if (Device) { 8721 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8722 CGF.Int64Ty, /*isSigned=*/true); 8723 } else { 8724 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8725 } 8726 8727 // Emit the number of elements in the offloading arrays. 8728 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 8729 8730 llvm::Value *OffloadingArgs[] = { 8731 DeviceID, PointerNum, BasePointersArrayArg, 8732 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 8733 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 8734 OffloadingArgs); 8735 8736 // If device pointer privatization is required, emit the body of the region 8737 // here. It will have to be duplicated: with and without privatization. 8738 if (!Info.CaptureDeviceAddrMap.empty()) 8739 CodeGen(CGF); 8740 }; 8741 8742 // Generate code for the closing of the data region. 8743 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 8744 PrePostActionTy &) { 8745 assert(Info.isValid() && "Invalid data environment closing arguments."); 8746 8747 llvm::Value *BasePointersArrayArg = nullptr; 8748 llvm::Value *PointersArrayArg = nullptr; 8749 llvm::Value *SizesArrayArg = nullptr; 8750 llvm::Value *MapTypesArrayArg = nullptr; 8751 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 8752 SizesArrayArg, MapTypesArrayArg, Info); 8753 8754 // Emit device ID if any. 8755 llvm::Value *DeviceID = nullptr; 8756 if (Device) { 8757 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8758 CGF.Int64Ty, /*isSigned=*/true); 8759 } else { 8760 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8761 } 8762 8763 // Emit the number of elements in the offloading arrays. 8764 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 8765 8766 llvm::Value *OffloadingArgs[] = { 8767 DeviceID, PointerNum, BasePointersArrayArg, 8768 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 8769 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 8770 OffloadingArgs); 8771 }; 8772 8773 // If we need device pointer privatization, we need to emit the body of the 8774 // region with no privatization in the 'else' branch of the conditional. 8775 // Otherwise, we don't have to do anything. 8776 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 8777 PrePostActionTy &) { 8778 if (!Info.CaptureDeviceAddrMap.empty()) { 8779 CodeGen.setAction(NoPrivAction); 8780 CodeGen(CGF); 8781 } 8782 }; 8783 8784 // We don't have to do anything to close the region if the if clause evaluates 8785 // to false. 8786 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 8787 8788 if (IfCond) { 8789 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 8790 } else { 8791 RegionCodeGenTy RCG(BeginThenGen); 8792 RCG(CGF); 8793 } 8794 8795 // If we don't require privatization of device pointers, we emit the body in 8796 // between the runtime calls. This avoids duplicating the body code. 8797 if (Info.CaptureDeviceAddrMap.empty()) { 8798 CodeGen.setAction(NoPrivAction); 8799 CodeGen(CGF); 8800 } 8801 8802 if (IfCond) { 8803 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 8804 } else { 8805 RegionCodeGenTy RCG(EndThenGen); 8806 RCG(CGF); 8807 } 8808 } 8809 8810 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 8811 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8812 const Expr *Device) { 8813 if (!CGF.HaveInsertPoint()) 8814 return; 8815 8816 assert((isa<OMPTargetEnterDataDirective>(D) || 8817 isa<OMPTargetExitDataDirective>(D) || 8818 isa<OMPTargetUpdateDirective>(D)) && 8819 "Expecting either target enter, exit data, or update directives."); 8820 8821 CodeGenFunction::OMPTargetDataInfo InputInfo; 8822 llvm::Value *MapTypesArray = nullptr; 8823 // Generate the code for the opening of the data environment. 8824 auto &&ThenGen = [this, &D, Device, &InputInfo, 8825 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 8826 // Emit device ID if any. 8827 llvm::Value *DeviceID = nullptr; 8828 if (Device) { 8829 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8830 CGF.Int64Ty, /*isSigned=*/true); 8831 } else { 8832 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8833 } 8834 8835 // Emit the number of elements in the offloading arrays. 8836 llvm::Constant *PointerNum = 8837 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8838 8839 llvm::Value *OffloadingArgs[] = {DeviceID, 8840 PointerNum, 8841 InputInfo.BasePointersArray.getPointer(), 8842 InputInfo.PointersArray.getPointer(), 8843 InputInfo.SizesArray.getPointer(), 8844 MapTypesArray}; 8845 8846 // Select the right runtime function call for each expected standalone 8847 // directive. 8848 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8849 OpenMPRTLFunction RTLFn; 8850 switch (D.getDirectiveKind()) { 8851 case OMPD_target_enter_data: 8852 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 8853 : OMPRTL__tgt_target_data_begin; 8854 break; 8855 case OMPD_target_exit_data: 8856 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 8857 : OMPRTL__tgt_target_data_end; 8858 break; 8859 case OMPD_target_update: 8860 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 8861 : OMPRTL__tgt_target_data_update; 8862 break; 8863 case OMPD_parallel: 8864 case OMPD_for: 8865 case OMPD_parallel_for: 8866 case OMPD_parallel_sections: 8867 case OMPD_for_simd: 8868 case OMPD_parallel_for_simd: 8869 case OMPD_cancel: 8870 case OMPD_cancellation_point: 8871 case OMPD_ordered: 8872 case OMPD_threadprivate: 8873 case OMPD_task: 8874 case OMPD_simd: 8875 case OMPD_sections: 8876 case OMPD_section: 8877 case OMPD_single: 8878 case OMPD_master: 8879 case OMPD_critical: 8880 case OMPD_taskyield: 8881 case OMPD_barrier: 8882 case OMPD_taskwait: 8883 case OMPD_taskgroup: 8884 case OMPD_atomic: 8885 case OMPD_flush: 8886 case OMPD_teams: 8887 case OMPD_target_data: 8888 case OMPD_distribute: 8889 case OMPD_distribute_simd: 8890 case OMPD_distribute_parallel_for: 8891 case OMPD_distribute_parallel_for_simd: 8892 case OMPD_teams_distribute: 8893 case OMPD_teams_distribute_simd: 8894 case OMPD_teams_distribute_parallel_for: 8895 case OMPD_teams_distribute_parallel_for_simd: 8896 case OMPD_declare_simd: 8897 case OMPD_declare_target: 8898 case OMPD_end_declare_target: 8899 case OMPD_declare_reduction: 8900 case OMPD_taskloop: 8901 case OMPD_taskloop_simd: 8902 case OMPD_target: 8903 case OMPD_target_simd: 8904 case OMPD_target_teams_distribute: 8905 case OMPD_target_teams_distribute_simd: 8906 case OMPD_target_teams_distribute_parallel_for: 8907 case OMPD_target_teams_distribute_parallel_for_simd: 8908 case OMPD_target_teams: 8909 case OMPD_target_parallel: 8910 case OMPD_target_parallel_for: 8911 case OMPD_target_parallel_for_simd: 8912 case OMPD_requires: 8913 case OMPD_unknown: 8914 llvm_unreachable("Unexpected standalone target data directive."); 8915 break; 8916 } 8917 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 8918 }; 8919 8920 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 8921 CodeGenFunction &CGF, PrePostActionTy &) { 8922 // Fill up the arrays with all the mapped variables. 8923 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8924 MappableExprsHandler::MapValuesArrayTy Pointers; 8925 MappableExprsHandler::MapValuesArrayTy Sizes; 8926 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8927 8928 // Get map clause information. 8929 MappableExprsHandler MEHandler(D, CGF); 8930 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 8931 8932 TargetDataInfo Info; 8933 // Fill up the arrays and create the arguments. 8934 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8935 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8936 Info.PointersArray, Info.SizesArray, 8937 Info.MapTypesArray, Info); 8938 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8939 InputInfo.BasePointersArray = 8940 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8941 InputInfo.PointersArray = 8942 Address(Info.PointersArray, CGM.getPointerAlign()); 8943 InputInfo.SizesArray = 8944 Address(Info.SizesArray, CGM.getPointerAlign()); 8945 MapTypesArray = Info.MapTypesArray; 8946 if (D.hasClausesOfKind<OMPDependClause>()) 8947 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8948 else 8949 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8950 }; 8951 8952 if (IfCond) { 8953 emitOMPIfClause(CGF, IfCond, TargetThenGen, 8954 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 8955 } else { 8956 RegionCodeGenTy ThenRCG(TargetThenGen); 8957 ThenRCG(CGF); 8958 } 8959 } 8960 8961 namespace { 8962 /// Kind of parameter in a function with 'declare simd' directive. 8963 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 8964 /// Attribute set of the parameter. 8965 struct ParamAttrTy { 8966 ParamKindTy Kind = Vector; 8967 llvm::APSInt StrideOrArg; 8968 llvm::APSInt Alignment; 8969 }; 8970 } // namespace 8971 8972 static unsigned evaluateCDTSize(const FunctionDecl *FD, 8973 ArrayRef<ParamAttrTy> ParamAttrs) { 8974 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 8975 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 8976 // of that clause. The VLEN value must be power of 2. 8977 // In other case the notion of the function`s "characteristic data type" (CDT) 8978 // is used to compute the vector length. 8979 // CDT is defined in the following order: 8980 // a) For non-void function, the CDT is the return type. 8981 // b) If the function has any non-uniform, non-linear parameters, then the 8982 // CDT is the type of the first such parameter. 8983 // c) If the CDT determined by a) or b) above is struct, union, or class 8984 // type which is pass-by-value (except for the type that maps to the 8985 // built-in complex data type), the characteristic data type is int. 8986 // d) If none of the above three cases is applicable, the CDT is int. 8987 // The VLEN is then determined based on the CDT and the size of vector 8988 // register of that ISA for which current vector version is generated. The 8989 // VLEN is computed using the formula below: 8990 // VLEN = sizeof(vector_register) / sizeof(CDT), 8991 // where vector register size specified in section 3.2.1 Registers and the 8992 // Stack Frame of original AMD64 ABI document. 8993 QualType RetType = FD->getReturnType(); 8994 if (RetType.isNull()) 8995 return 0; 8996 ASTContext &C = FD->getASTContext(); 8997 QualType CDT; 8998 if (!RetType.isNull() && !RetType->isVoidType()) { 8999 CDT = RetType; 9000 } else { 9001 unsigned Offset = 0; 9002 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 9003 if (ParamAttrs[Offset].Kind == Vector) 9004 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 9005 ++Offset; 9006 } 9007 if (CDT.isNull()) { 9008 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9009 if (ParamAttrs[I + Offset].Kind == Vector) { 9010 CDT = FD->getParamDecl(I)->getType(); 9011 break; 9012 } 9013 } 9014 } 9015 } 9016 if (CDT.isNull()) 9017 CDT = C.IntTy; 9018 CDT = CDT->getCanonicalTypeUnqualified(); 9019 if (CDT->isRecordType() || CDT->isUnionType()) 9020 CDT = C.IntTy; 9021 return C.getTypeSize(CDT); 9022 } 9023 9024 static void 9025 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 9026 const llvm::APSInt &VLENVal, 9027 ArrayRef<ParamAttrTy> ParamAttrs, 9028 OMPDeclareSimdDeclAttr::BranchStateTy State) { 9029 struct ISADataTy { 9030 char ISA; 9031 unsigned VecRegSize; 9032 }; 9033 ISADataTy ISAData[] = { 9034 { 9035 'b', 128 9036 }, // SSE 9037 { 9038 'c', 256 9039 }, // AVX 9040 { 9041 'd', 256 9042 }, // AVX2 9043 { 9044 'e', 512 9045 }, // AVX512 9046 }; 9047 llvm::SmallVector<char, 2> Masked; 9048 switch (State) { 9049 case OMPDeclareSimdDeclAttr::BS_Undefined: 9050 Masked.push_back('N'); 9051 Masked.push_back('M'); 9052 break; 9053 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9054 Masked.push_back('N'); 9055 break; 9056 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9057 Masked.push_back('M'); 9058 break; 9059 } 9060 for (char Mask : Masked) { 9061 for (const ISADataTy &Data : ISAData) { 9062 SmallString<256> Buffer; 9063 llvm::raw_svector_ostream Out(Buffer); 9064 Out << "_ZGV" << Data.ISA << Mask; 9065 if (!VLENVal) { 9066 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 9067 evaluateCDTSize(FD, ParamAttrs)); 9068 } else { 9069 Out << VLENVal; 9070 } 9071 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 9072 switch (ParamAttr.Kind){ 9073 case LinearWithVarStride: 9074 Out << 's' << ParamAttr.StrideOrArg; 9075 break; 9076 case Linear: 9077 Out << 'l'; 9078 if (!!ParamAttr.StrideOrArg) 9079 Out << ParamAttr.StrideOrArg; 9080 break; 9081 case Uniform: 9082 Out << 'u'; 9083 break; 9084 case Vector: 9085 Out << 'v'; 9086 break; 9087 } 9088 if (!!ParamAttr.Alignment) 9089 Out << 'a' << ParamAttr.Alignment; 9090 } 9091 Out << '_' << Fn->getName(); 9092 Fn->addFnAttr(Out.str()); 9093 } 9094 } 9095 } 9096 9097 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 9098 llvm::Function *Fn) { 9099 ASTContext &C = CGM.getContext(); 9100 FD = FD->getMostRecentDecl(); 9101 // Map params to their positions in function decl. 9102 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 9103 if (isa<CXXMethodDecl>(FD)) 9104 ParamPositions.try_emplace(FD, 0); 9105 unsigned ParamPos = ParamPositions.size(); 9106 for (const ParmVarDecl *P : FD->parameters()) { 9107 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 9108 ++ParamPos; 9109 } 9110 while (FD) { 9111 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 9112 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 9113 // Mark uniform parameters. 9114 for (const Expr *E : Attr->uniforms()) { 9115 E = E->IgnoreParenImpCasts(); 9116 unsigned Pos; 9117 if (isa<CXXThisExpr>(E)) { 9118 Pos = ParamPositions[FD]; 9119 } else { 9120 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 9121 ->getCanonicalDecl(); 9122 Pos = ParamPositions[PVD]; 9123 } 9124 ParamAttrs[Pos].Kind = Uniform; 9125 } 9126 // Get alignment info. 9127 auto NI = Attr->alignments_begin(); 9128 for (const Expr *E : Attr->aligneds()) { 9129 E = E->IgnoreParenImpCasts(); 9130 unsigned Pos; 9131 QualType ParmTy; 9132 if (isa<CXXThisExpr>(E)) { 9133 Pos = ParamPositions[FD]; 9134 ParmTy = E->getType(); 9135 } else { 9136 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 9137 ->getCanonicalDecl(); 9138 Pos = ParamPositions[PVD]; 9139 ParmTy = PVD->getType(); 9140 } 9141 ParamAttrs[Pos].Alignment = 9142 (*NI) 9143 ? (*NI)->EvaluateKnownConstInt(C) 9144 : llvm::APSInt::getUnsigned( 9145 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 9146 .getQuantity()); 9147 ++NI; 9148 } 9149 // Mark linear parameters. 9150 auto SI = Attr->steps_begin(); 9151 auto MI = Attr->modifiers_begin(); 9152 for (const Expr *E : Attr->linears()) { 9153 E = E->IgnoreParenImpCasts(); 9154 unsigned Pos; 9155 if (isa<CXXThisExpr>(E)) { 9156 Pos = ParamPositions[FD]; 9157 } else { 9158 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 9159 ->getCanonicalDecl(); 9160 Pos = ParamPositions[PVD]; 9161 } 9162 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 9163 ParamAttr.Kind = Linear; 9164 if (*SI) { 9165 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 9166 Expr::SE_AllowSideEffects)) { 9167 if (const auto *DRE = 9168 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 9169 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 9170 ParamAttr.Kind = LinearWithVarStride; 9171 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 9172 ParamPositions[StridePVD->getCanonicalDecl()]); 9173 } 9174 } 9175 } 9176 } 9177 ++SI; 9178 ++MI; 9179 } 9180 llvm::APSInt VLENVal; 9181 if (const Expr *VLEN = Attr->getSimdlen()) 9182 VLENVal = VLEN->EvaluateKnownConstInt(C); 9183 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 9184 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 9185 CGM.getTriple().getArch() == llvm::Triple::x86_64) 9186 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 9187 } 9188 FD = FD->getPreviousDecl(); 9189 } 9190 } 9191 9192 namespace { 9193 /// Cleanup action for doacross support. 9194 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 9195 public: 9196 static const int DoacrossFinArgs = 2; 9197 9198 private: 9199 llvm::Value *RTLFn; 9200 llvm::Value *Args[DoacrossFinArgs]; 9201 9202 public: 9203 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 9204 : RTLFn(RTLFn) { 9205 assert(CallArgs.size() == DoacrossFinArgs); 9206 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 9207 } 9208 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 9209 if (!CGF.HaveInsertPoint()) 9210 return; 9211 CGF.EmitRuntimeCall(RTLFn, Args); 9212 } 9213 }; 9214 } // namespace 9215 9216 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 9217 const OMPLoopDirective &D, 9218 ArrayRef<Expr *> NumIterations) { 9219 if (!CGF.HaveInsertPoint()) 9220 return; 9221 9222 ASTContext &C = CGM.getContext(); 9223 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9224 RecordDecl *RD; 9225 if (KmpDimTy.isNull()) { 9226 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 9227 // kmp_int64 lo; // lower 9228 // kmp_int64 up; // upper 9229 // kmp_int64 st; // stride 9230 // }; 9231 RD = C.buildImplicitRecord("kmp_dim"); 9232 RD->startDefinition(); 9233 addFieldToRecordDecl(C, RD, Int64Ty); 9234 addFieldToRecordDecl(C, RD, Int64Ty); 9235 addFieldToRecordDecl(C, RD, Int64Ty); 9236 RD->completeDefinition(); 9237 KmpDimTy = C.getRecordType(RD); 9238 } else { 9239 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 9240 } 9241 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 9242 QualType ArrayTy = 9243 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 9244 9245 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9246 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 9247 enum { LowerFD = 0, UpperFD, StrideFD }; 9248 // Fill dims with data. 9249 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 9250 LValue DimsLVal = 9251 CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP( 9252 DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)), 9253 KmpDimTy); 9254 // dims.upper = num_iterations; 9255 LValue UpperLVal = CGF.EmitLValueForField( 9256 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 9257 llvm::Value *NumIterVal = 9258 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 9259 D.getNumIterations()->getType(), Int64Ty, 9260 D.getNumIterations()->getExprLoc()); 9261 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 9262 // dims.stride = 1; 9263 LValue StrideLVal = CGF.EmitLValueForField( 9264 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9265 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 9266 StrideLVal); 9267 } 9268 9269 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 9270 // kmp_int32 num_dims, struct kmp_dim * dims); 9271 llvm::Value *Args[] = { 9272 emitUpdateLocation(CGF, D.getBeginLoc()), 9273 getThreadID(CGF, D.getBeginLoc()), 9274 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 9275 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9276 CGF.Builder 9277 .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy)) 9278 .getPointer(), 9279 CGM.VoidPtrTy)}; 9280 9281 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 9282 CGF.EmitRuntimeCall(RTLFn, Args); 9283 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 9284 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 9285 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 9286 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 9287 llvm::makeArrayRef(FiniArgs)); 9288 } 9289 9290 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 9291 const OMPDependClause *C) { 9292 QualType Int64Ty = 9293 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9294 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 9295 QualType ArrayTy = CGM.getContext().getConstantArrayType( 9296 Int64Ty, Size, ArrayType::Normal, 0); 9297 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 9298 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 9299 const Expr *CounterVal = C->getLoopData(I); 9300 assert(CounterVal); 9301 llvm::Value *CntVal = CGF.EmitScalarConversion( 9302 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 9303 CounterVal->getExprLoc()); 9304 CGF.EmitStoreOfScalar( 9305 CntVal, 9306 CGF.Builder.CreateConstArrayGEP( 9307 CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)), 9308 /*Volatile=*/false, Int64Ty); 9309 } 9310 llvm::Value *Args[] = { 9311 emitUpdateLocation(CGF, C->getBeginLoc()), 9312 getThreadID(CGF, C->getBeginLoc()), 9313 CGF.Builder 9314 .CreateConstArrayGEP(CntAddr, 0, 9315 CGM.getContext().getTypeSizeInChars(Int64Ty)) 9316 .getPointer()}; 9317 llvm::Value *RTLFn; 9318 if (C->getDependencyKind() == OMPC_DEPEND_source) { 9319 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 9320 } else { 9321 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 9322 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 9323 } 9324 CGF.EmitRuntimeCall(RTLFn, Args); 9325 } 9326 9327 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 9328 llvm::Value *Callee, 9329 ArrayRef<llvm::Value *> Args) const { 9330 assert(Loc.isValid() && "Outlined function call location must be valid."); 9331 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 9332 9333 if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { 9334 if (Fn->doesNotThrow()) { 9335 CGF.EmitNounwindRuntimeCall(Fn, Args); 9336 return; 9337 } 9338 } 9339 CGF.EmitRuntimeCall(Callee, Args); 9340 } 9341 9342 void CGOpenMPRuntime::emitOutlinedFunctionCall( 9343 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, 9344 ArrayRef<llvm::Value *> Args) const { 9345 emitCall(CGF, Loc, OutlinedFn, Args); 9346 } 9347 9348 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 9349 const VarDecl *NativeParam, 9350 const VarDecl *TargetParam) const { 9351 return CGF.GetAddrOfLocalVar(NativeParam); 9352 } 9353 9354 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 9355 const VarDecl *VD) { 9356 return Address::invalid(); 9357 } 9358 9359 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 9360 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 9361 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 9362 llvm_unreachable("Not supported in SIMD-only mode"); 9363 } 9364 9365 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 9366 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 9367 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 9368 llvm_unreachable("Not supported in SIMD-only mode"); 9369 } 9370 9371 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 9372 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 9373 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 9374 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 9375 bool Tied, unsigned &NumberOfParts) { 9376 llvm_unreachable("Not supported in SIMD-only mode"); 9377 } 9378 9379 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 9380 SourceLocation Loc, 9381 llvm::Value *OutlinedFn, 9382 ArrayRef<llvm::Value *> CapturedVars, 9383 const Expr *IfCond) { 9384 llvm_unreachable("Not supported in SIMD-only mode"); 9385 } 9386 9387 void CGOpenMPSIMDRuntime::emitCriticalRegion( 9388 CodeGenFunction &CGF, StringRef CriticalName, 9389 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 9390 const Expr *Hint) { 9391 llvm_unreachable("Not supported in SIMD-only mode"); 9392 } 9393 9394 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 9395 const RegionCodeGenTy &MasterOpGen, 9396 SourceLocation Loc) { 9397 llvm_unreachable("Not supported in SIMD-only mode"); 9398 } 9399 9400 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 9401 SourceLocation Loc) { 9402 llvm_unreachable("Not supported in SIMD-only mode"); 9403 } 9404 9405 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 9406 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 9407 SourceLocation Loc) { 9408 llvm_unreachable("Not supported in SIMD-only mode"); 9409 } 9410 9411 void CGOpenMPSIMDRuntime::emitSingleRegion( 9412 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 9413 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 9414 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 9415 ArrayRef<const Expr *> AssignmentOps) { 9416 llvm_unreachable("Not supported in SIMD-only mode"); 9417 } 9418 9419 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 9420 const RegionCodeGenTy &OrderedOpGen, 9421 SourceLocation Loc, 9422 bool IsThreads) { 9423 llvm_unreachable("Not supported in SIMD-only mode"); 9424 } 9425 9426 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 9427 SourceLocation Loc, 9428 OpenMPDirectiveKind Kind, 9429 bool EmitChecks, 9430 bool ForceSimpleCall) { 9431 llvm_unreachable("Not supported in SIMD-only mode"); 9432 } 9433 9434 void CGOpenMPSIMDRuntime::emitForDispatchInit( 9435 CodeGenFunction &CGF, SourceLocation Loc, 9436 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 9437 bool Ordered, const DispatchRTInput &DispatchValues) { 9438 llvm_unreachable("Not supported in SIMD-only mode"); 9439 } 9440 9441 void CGOpenMPSIMDRuntime::emitForStaticInit( 9442 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 9443 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 9444 llvm_unreachable("Not supported in SIMD-only mode"); 9445 } 9446 9447 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 9448 CodeGenFunction &CGF, SourceLocation Loc, 9449 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 9450 llvm_unreachable("Not supported in SIMD-only mode"); 9451 } 9452 9453 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 9454 SourceLocation Loc, 9455 unsigned IVSize, 9456 bool IVSigned) { 9457 llvm_unreachable("Not supported in SIMD-only mode"); 9458 } 9459 9460 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 9461 SourceLocation Loc, 9462 OpenMPDirectiveKind DKind) { 9463 llvm_unreachable("Not supported in SIMD-only mode"); 9464 } 9465 9466 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 9467 SourceLocation Loc, 9468 unsigned IVSize, bool IVSigned, 9469 Address IL, Address LB, 9470 Address UB, Address ST) { 9471 llvm_unreachable("Not supported in SIMD-only mode"); 9472 } 9473 9474 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 9475 llvm::Value *NumThreads, 9476 SourceLocation Loc) { 9477 llvm_unreachable("Not supported in SIMD-only mode"); 9478 } 9479 9480 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 9481 OpenMPProcBindClauseKind ProcBind, 9482 SourceLocation Loc) { 9483 llvm_unreachable("Not supported in SIMD-only mode"); 9484 } 9485 9486 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 9487 const VarDecl *VD, 9488 Address VDAddr, 9489 SourceLocation Loc) { 9490 llvm_unreachable("Not supported in SIMD-only mode"); 9491 } 9492 9493 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 9494 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 9495 CodeGenFunction *CGF) { 9496 llvm_unreachable("Not supported in SIMD-only mode"); 9497 } 9498 9499 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 9500 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 9501 llvm_unreachable("Not supported in SIMD-only mode"); 9502 } 9503 9504 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 9505 ArrayRef<const Expr *> Vars, 9506 SourceLocation Loc) { 9507 llvm_unreachable("Not supported in SIMD-only mode"); 9508 } 9509 9510 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 9511 const OMPExecutableDirective &D, 9512 llvm::Value *TaskFunction, 9513 QualType SharedsTy, Address Shareds, 9514 const Expr *IfCond, 9515 const OMPTaskDataTy &Data) { 9516 llvm_unreachable("Not supported in SIMD-only mode"); 9517 } 9518 9519 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 9520 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 9521 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 9522 const Expr *IfCond, const OMPTaskDataTy &Data) { 9523 llvm_unreachable("Not supported in SIMD-only mode"); 9524 } 9525 9526 void CGOpenMPSIMDRuntime::emitReduction( 9527 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 9528 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 9529 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 9530 assert(Options.SimpleReduction && "Only simple reduction is expected."); 9531 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 9532 ReductionOps, Options); 9533 } 9534 9535 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 9536 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 9537 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 9538 llvm_unreachable("Not supported in SIMD-only mode"); 9539 } 9540 9541 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 9542 SourceLocation Loc, 9543 ReductionCodeGen &RCG, 9544 unsigned N) { 9545 llvm_unreachable("Not supported in SIMD-only mode"); 9546 } 9547 9548 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 9549 SourceLocation Loc, 9550 llvm::Value *ReductionsPtr, 9551 LValue SharedLVal) { 9552 llvm_unreachable("Not supported in SIMD-only mode"); 9553 } 9554 9555 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 9556 SourceLocation Loc) { 9557 llvm_unreachable("Not supported in SIMD-only mode"); 9558 } 9559 9560 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 9561 CodeGenFunction &CGF, SourceLocation Loc, 9562 OpenMPDirectiveKind CancelRegion) { 9563 llvm_unreachable("Not supported in SIMD-only mode"); 9564 } 9565 9566 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 9567 SourceLocation Loc, const Expr *IfCond, 9568 OpenMPDirectiveKind CancelRegion) { 9569 llvm_unreachable("Not supported in SIMD-only mode"); 9570 } 9571 9572 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 9573 const OMPExecutableDirective &D, StringRef ParentName, 9574 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 9575 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 9576 llvm_unreachable("Not supported in SIMD-only mode"); 9577 } 9578 9579 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 9580 const OMPExecutableDirective &D, 9581 llvm::Value *OutlinedFn, 9582 llvm::Value *OutlinedFnID, 9583 const Expr *IfCond, const Expr *Device) { 9584 llvm_unreachable("Not supported in SIMD-only mode"); 9585 } 9586 9587 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 9588 llvm_unreachable("Not supported in SIMD-only mode"); 9589 } 9590 9591 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9592 llvm_unreachable("Not supported in SIMD-only mode"); 9593 } 9594 9595 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 9596 return false; 9597 } 9598 9599 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 9600 return nullptr; 9601 } 9602 9603 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 9604 const OMPExecutableDirective &D, 9605 SourceLocation Loc, 9606 llvm::Value *OutlinedFn, 9607 ArrayRef<llvm::Value *> CapturedVars) { 9608 llvm_unreachable("Not supported in SIMD-only mode"); 9609 } 9610 9611 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9612 const Expr *NumTeams, 9613 const Expr *ThreadLimit, 9614 SourceLocation Loc) { 9615 llvm_unreachable("Not supported in SIMD-only mode"); 9616 } 9617 9618 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 9619 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9620 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9621 llvm_unreachable("Not supported in SIMD-only mode"); 9622 } 9623 9624 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 9625 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9626 const Expr *Device) { 9627 llvm_unreachable("Not supported in SIMD-only mode"); 9628 } 9629 9630 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 9631 const OMPLoopDirective &D, 9632 ArrayRef<Expr *> NumIterations) { 9633 llvm_unreachable("Not supported in SIMD-only mode"); 9634 } 9635 9636 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 9637 const OMPDependClause *C) { 9638 llvm_unreachable("Not supported in SIMD-only mode"); 9639 } 9640 9641 const VarDecl * 9642 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 9643 const VarDecl *NativeParam) const { 9644 llvm_unreachable("Not supported in SIMD-only mode"); 9645 } 9646 9647 Address 9648 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 9649 const VarDecl *NativeParam, 9650 const VarDecl *TargetParam) const { 9651 llvm_unreachable("Not supported in SIMD-only mode"); 9652 } 9653 9654