1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/BitmaskEnum.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/CallSite.h" 25 #include "llvm/IR/DerivedTypes.h" 26 #include "llvm/IR/GlobalValue.h" 27 #include "llvm/IR/Value.h" 28 #include "llvm/Support/Format.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <cassert> 31 32 using namespace clang; 33 using namespace CodeGen; 34 35 namespace { 36 /// \brief Base class for handling code generation inside OpenMP regions. 37 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 38 public: 39 /// \brief Kinds of OpenMP regions used in codegen. 40 enum CGOpenMPRegionKind { 41 /// \brief Region with outlined function for standalone 'parallel' 42 /// directive. 43 ParallelOutlinedRegion, 44 /// \brief Region with outlined function for standalone 'task' directive. 45 TaskOutlinedRegion, 46 /// \brief Region for constructs that do not require function outlining, 47 /// like 'for', 'sections', 'atomic' etc. directives. 48 InlinedRegion, 49 /// \brief Region with outlined function for standalone 'target' directive. 50 TargetRegion, 51 }; 52 53 CGOpenMPRegionInfo(const CapturedStmt &CS, 54 const CGOpenMPRegionKind RegionKind, 55 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 56 bool HasCancel) 57 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 58 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 59 60 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 61 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 62 bool HasCancel) 63 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 64 Kind(Kind), HasCancel(HasCancel) {} 65 66 /// \brief Get a variable or parameter for storing global thread id 67 /// inside OpenMP construct. 68 virtual const VarDecl *getThreadIDVariable() const = 0; 69 70 /// \brief Emit the captured statement body. 71 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 72 73 /// \brief Get an LValue for the current ThreadID variable. 74 /// \return LValue for thread id variable. This LValue always has type int32*. 75 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 76 77 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 78 79 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 80 81 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 82 83 bool hasCancel() const { return HasCancel; } 84 85 static bool classof(const CGCapturedStmtInfo *Info) { 86 return Info->getKind() == CR_OpenMP; 87 } 88 89 ~CGOpenMPRegionInfo() override = default; 90 91 protected: 92 CGOpenMPRegionKind RegionKind; 93 RegionCodeGenTy CodeGen; 94 OpenMPDirectiveKind Kind; 95 bool HasCancel; 96 }; 97 98 /// \brief API for captured statement code generation in OpenMP constructs. 99 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 100 public: 101 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 102 const RegionCodeGenTy &CodeGen, 103 OpenMPDirectiveKind Kind, bool HasCancel, 104 StringRef HelperName) 105 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 106 HasCancel), 107 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 108 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 109 } 110 111 /// \brief Get a variable or parameter for storing global thread id 112 /// inside OpenMP construct. 113 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 114 115 /// \brief Get the name of the capture helper. 116 StringRef getHelperName() const override { return HelperName; } 117 118 static bool classof(const CGCapturedStmtInfo *Info) { 119 return CGOpenMPRegionInfo::classof(Info) && 120 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 121 ParallelOutlinedRegion; 122 } 123 124 private: 125 /// \brief A variable or parameter storing global thread id for OpenMP 126 /// constructs. 127 const VarDecl *ThreadIDVar; 128 StringRef HelperName; 129 }; 130 131 /// \brief API for captured statement code generation in OpenMP constructs. 132 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 133 public: 134 class UntiedTaskActionTy final : public PrePostActionTy { 135 bool Untied; 136 const VarDecl *PartIDVar; 137 const RegionCodeGenTy UntiedCodeGen; 138 llvm::SwitchInst *UntiedSwitch = nullptr; 139 140 public: 141 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 142 const RegionCodeGenTy &UntiedCodeGen) 143 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 144 void Enter(CodeGenFunction &CGF) override { 145 if (Untied) { 146 // Emit task switching point. 147 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 148 CGF.GetAddrOfLocalVar(PartIDVar), 149 PartIDVar->getType()->castAs<PointerType>()); 150 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// \brief Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// \brief Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// \brief Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// \brief A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// \brief API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // \brief Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// \brief Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// \brief Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// \brief Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// \brief Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// \brief CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// \brief API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// \brief This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// \brief Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// \brief API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 359 return CGF.EmitLValue(&DRE).getAddress(); 360 }); 361 } 362 (void)PrivScope.Privatize(); 363 } 364 365 /// \brief Lookup the captured field decl for a variable. 366 const FieldDecl *lookup(const VarDecl *VD) const override { 367 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 368 return FD; 369 return nullptr; 370 } 371 372 /// \brief Emit the captured statement body. 373 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 374 llvm_unreachable("No body for expressions"); 375 } 376 377 /// \brief Get a variable or parameter for storing global thread id 378 /// inside OpenMP construct. 379 const VarDecl *getThreadIDVariable() const override { 380 llvm_unreachable("No thread id for expressions"); 381 } 382 383 /// \brief Get the name of the capture helper. 384 StringRef getHelperName() const override { 385 llvm_unreachable("No helper name for expressions"); 386 } 387 388 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 389 390 private: 391 /// Private scope to capture global variables. 392 CodeGenFunction::OMPPrivateScope PrivScope; 393 }; 394 395 /// \brief RAII for emitting code of OpenMP constructs. 396 class InlinedOpenMPRegionRAII { 397 CodeGenFunction &CGF; 398 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 399 FieldDecl *LambdaThisCaptureField = nullptr; 400 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 401 402 public: 403 /// \brief Constructs region for combined constructs. 404 /// \param CodeGen Code generation sequence for combined directives. Includes 405 /// a list of functions used for code generation of implicitly inlined 406 /// regions. 407 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 408 OpenMPDirectiveKind Kind, bool HasCancel) 409 : CGF(CGF) { 410 // Start emission for the construct. 411 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 412 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 413 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 414 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 415 CGF.LambdaThisCaptureField = nullptr; 416 BlockInfo = CGF.BlockInfo; 417 CGF.BlockInfo = nullptr; 418 } 419 420 ~InlinedOpenMPRegionRAII() { 421 // Restore original CapturedStmtInfo only if we're done with code emission. 422 auto *OldCSI = 423 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 424 delete CGF.CapturedStmtInfo; 425 CGF.CapturedStmtInfo = OldCSI; 426 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 427 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 428 CGF.BlockInfo = BlockInfo; 429 } 430 }; 431 432 /// \brief Values for bit flags used in the ident_t to describe the fields. 433 /// All enumeric elements are named and described in accordance with the code 434 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 435 enum OpenMPLocationFlags : unsigned { 436 /// \brief Use trampoline for internal microtask. 437 OMP_IDENT_IMD = 0x01, 438 /// \brief Use c-style ident structure. 439 OMP_IDENT_KMPC = 0x02, 440 /// \brief Atomic reduction option for kmpc_reduce. 441 OMP_ATOMIC_REDUCE = 0x10, 442 /// \brief Explicit 'barrier' directive. 443 OMP_IDENT_BARRIER_EXPL = 0x20, 444 /// \brief Implicit barrier in code. 445 OMP_IDENT_BARRIER_IMPL = 0x40, 446 /// \brief Implicit barrier in 'for' directive. 447 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 448 /// \brief Implicit barrier in 'sections' directive. 449 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 450 /// \brief Implicit barrier in 'single' directive. 451 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 452 /// Call of __kmp_for_static_init for static loop. 453 OMP_IDENT_WORK_LOOP = 0x200, 454 /// Call of __kmp_for_static_init for sections. 455 OMP_IDENT_WORK_SECTIONS = 0x400, 456 /// Call of __kmp_for_static_init for distribute. 457 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 458 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 459 }; 460 461 /// \brief Describes ident structure that describes a source location. 462 /// All descriptions are taken from 463 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 464 /// Original structure: 465 /// typedef struct ident { 466 /// kmp_int32 reserved_1; /**< might be used in Fortran; 467 /// see above */ 468 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 469 /// KMP_IDENT_KMPC identifies this union 470 /// member */ 471 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 472 /// see above */ 473 ///#if USE_ITT_BUILD 474 /// /* but currently used for storing 475 /// region-specific ITT */ 476 /// /* contextual information. */ 477 ///#endif /* USE_ITT_BUILD */ 478 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 479 /// C++ */ 480 /// char const *psource; /**< String describing the source location. 481 /// The string is composed of semi-colon separated 482 // fields which describe the source file, 483 /// the function and a pair of line numbers that 484 /// delimit the construct. 485 /// */ 486 /// } ident_t; 487 enum IdentFieldIndex { 488 /// \brief might be used in Fortran 489 IdentField_Reserved_1, 490 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 491 IdentField_Flags, 492 /// \brief Not really used in Fortran any more 493 IdentField_Reserved_2, 494 /// \brief Source[4] in Fortran, do not use for C++ 495 IdentField_Reserved_3, 496 /// \brief String describing the source location. The string is composed of 497 /// semi-colon separated fields which describe the source file, the function 498 /// and a pair of line numbers that delimit the construct. 499 IdentField_PSource 500 }; 501 502 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 503 /// the enum sched_type in kmp.h). 504 enum OpenMPSchedType { 505 /// \brief Lower bound for default (unordered) versions. 506 OMP_sch_lower = 32, 507 OMP_sch_static_chunked = 33, 508 OMP_sch_static = 34, 509 OMP_sch_dynamic_chunked = 35, 510 OMP_sch_guided_chunked = 36, 511 OMP_sch_runtime = 37, 512 OMP_sch_auto = 38, 513 /// static with chunk adjustment (e.g., simd) 514 OMP_sch_static_balanced_chunked = 45, 515 /// \brief Lower bound for 'ordered' versions. 516 OMP_ord_lower = 64, 517 OMP_ord_static_chunked = 65, 518 OMP_ord_static = 66, 519 OMP_ord_dynamic_chunked = 67, 520 OMP_ord_guided_chunked = 68, 521 OMP_ord_runtime = 69, 522 OMP_ord_auto = 70, 523 OMP_sch_default = OMP_sch_static, 524 /// \brief dist_schedule types 525 OMP_dist_sch_static_chunked = 91, 526 OMP_dist_sch_static = 92, 527 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 528 /// Set if the monotonic schedule modifier was present. 529 OMP_sch_modifier_monotonic = (1 << 29), 530 /// Set if the nonmonotonic schedule modifier was present. 531 OMP_sch_modifier_nonmonotonic = (1 << 30), 532 }; 533 534 enum OpenMPRTLFunction { 535 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 536 /// kmpc_micro microtask, ...); 537 OMPRTL__kmpc_fork_call, 538 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 539 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 540 OMPRTL__kmpc_threadprivate_cached, 541 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 542 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 543 OMPRTL__kmpc_threadprivate_register, 544 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 545 OMPRTL__kmpc_global_thread_num, 546 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 547 // kmp_critical_name *crit); 548 OMPRTL__kmpc_critical, 549 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 550 // global_tid, kmp_critical_name *crit, uintptr_t hint); 551 OMPRTL__kmpc_critical_with_hint, 552 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 553 // kmp_critical_name *crit); 554 OMPRTL__kmpc_end_critical, 555 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 556 // global_tid); 557 OMPRTL__kmpc_cancel_barrier, 558 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 559 OMPRTL__kmpc_barrier, 560 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 561 OMPRTL__kmpc_for_static_fini, 562 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 563 // global_tid); 564 OMPRTL__kmpc_serialized_parallel, 565 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 566 // global_tid); 567 OMPRTL__kmpc_end_serialized_parallel, 568 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 569 // kmp_int32 num_threads); 570 OMPRTL__kmpc_push_num_threads, 571 // Call to void __kmpc_flush(ident_t *loc); 572 OMPRTL__kmpc_flush, 573 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 574 OMPRTL__kmpc_master, 575 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 576 OMPRTL__kmpc_end_master, 577 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 578 // int end_part); 579 OMPRTL__kmpc_omp_taskyield, 580 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 581 OMPRTL__kmpc_single, 582 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 583 OMPRTL__kmpc_end_single, 584 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 585 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 586 // kmp_routine_entry_t *task_entry); 587 OMPRTL__kmpc_omp_task_alloc, 588 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 589 // new_task); 590 OMPRTL__kmpc_omp_task, 591 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 592 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 593 // kmp_int32 didit); 594 OMPRTL__kmpc_copyprivate, 595 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 596 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 597 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 598 OMPRTL__kmpc_reduce, 599 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 600 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 601 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 602 // *lck); 603 OMPRTL__kmpc_reduce_nowait, 604 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 605 // kmp_critical_name *lck); 606 OMPRTL__kmpc_end_reduce, 607 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 608 // kmp_critical_name *lck); 609 OMPRTL__kmpc_end_reduce_nowait, 610 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 611 // kmp_task_t * new_task); 612 OMPRTL__kmpc_omp_task_begin_if0, 613 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 614 // kmp_task_t * new_task); 615 OMPRTL__kmpc_omp_task_complete_if0, 616 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 617 OMPRTL__kmpc_ordered, 618 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 619 OMPRTL__kmpc_end_ordered, 620 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 621 // global_tid); 622 OMPRTL__kmpc_omp_taskwait, 623 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 624 OMPRTL__kmpc_taskgroup, 625 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 626 OMPRTL__kmpc_end_taskgroup, 627 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 628 // int proc_bind); 629 OMPRTL__kmpc_push_proc_bind, 630 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 631 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 632 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 633 OMPRTL__kmpc_omp_task_with_deps, 634 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 635 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 636 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 637 OMPRTL__kmpc_omp_wait_deps, 638 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 639 // global_tid, kmp_int32 cncl_kind); 640 OMPRTL__kmpc_cancellationpoint, 641 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 642 // kmp_int32 cncl_kind); 643 OMPRTL__kmpc_cancel, 644 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 645 // kmp_int32 num_teams, kmp_int32 thread_limit); 646 OMPRTL__kmpc_push_num_teams, 647 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 648 // microtask, ...); 649 OMPRTL__kmpc_fork_teams, 650 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 651 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 652 // sched, kmp_uint64 grainsize, void *task_dup); 653 OMPRTL__kmpc_taskloop, 654 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 655 // num_dims, struct kmp_dim *dims); 656 OMPRTL__kmpc_doacross_init, 657 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 658 OMPRTL__kmpc_doacross_fini, 659 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 660 // *vec); 661 OMPRTL__kmpc_doacross_post, 662 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 663 // *vec); 664 OMPRTL__kmpc_doacross_wait, 665 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 666 // *data); 667 OMPRTL__kmpc_task_reduction_init, 668 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 669 // *d); 670 OMPRTL__kmpc_task_reduction_get_th_data, 671 672 // 673 // Offloading related calls 674 // 675 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 676 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 677 // *arg_types); 678 OMPRTL__tgt_target, 679 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 680 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 681 // *arg_types); 682 OMPRTL__tgt_target_nowait, 683 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 684 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 685 // *arg_types, int32_t num_teams, int32_t thread_limit); 686 OMPRTL__tgt_target_teams, 687 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 688 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 689 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 690 OMPRTL__tgt_target_teams_nowait, 691 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 692 OMPRTL__tgt_register_lib, 693 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 694 OMPRTL__tgt_unregister_lib, 695 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 696 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 697 OMPRTL__tgt_target_data_begin, 698 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 699 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 700 // *arg_types); 701 OMPRTL__tgt_target_data_begin_nowait, 702 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 703 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 704 OMPRTL__tgt_target_data_end, 705 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 706 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 707 // *arg_types); 708 OMPRTL__tgt_target_data_end_nowait, 709 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 710 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 711 OMPRTL__tgt_target_data_update, 712 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 713 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target_data_update_nowait, 716 }; 717 718 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 719 /// region. 720 class CleanupTy final : public EHScopeStack::Cleanup { 721 PrePostActionTy *Action; 722 723 public: 724 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 725 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 726 if (!CGF.HaveInsertPoint()) 727 return; 728 Action->Exit(CGF); 729 } 730 }; 731 732 } // anonymous namespace 733 734 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 735 CodeGenFunction::RunCleanupsScope Scope(CGF); 736 if (PrePostAction) { 737 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 738 Callback(CodeGen, CGF, *PrePostAction); 739 } else { 740 PrePostActionTy Action; 741 Callback(CodeGen, CGF, Action); 742 } 743 } 744 745 /// Check if the combiner is a call to UDR combiner and if it is so return the 746 /// UDR decl used for reduction. 747 static const OMPDeclareReductionDecl * 748 getReductionInit(const Expr *ReductionOp) { 749 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 750 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 751 if (auto *DRE = 752 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 753 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 754 return DRD; 755 return nullptr; 756 } 757 758 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 759 const OMPDeclareReductionDecl *DRD, 760 const Expr *InitOp, 761 Address Private, Address Original, 762 QualType Ty) { 763 if (DRD->getInitializer()) { 764 std::pair<llvm::Function *, llvm::Function *> Reduction = 765 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 766 auto *CE = cast<CallExpr>(InitOp); 767 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 768 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 769 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 770 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 771 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 772 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 773 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 774 [=]() -> Address { return Private; }); 775 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 776 [=]() -> Address { return Original; }); 777 (void)PrivateScope.Privatize(); 778 RValue Func = RValue::get(Reduction.second); 779 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 780 CGF.EmitIgnoredExpr(InitOp); 781 } else { 782 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 783 auto *GV = new llvm::GlobalVariable( 784 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 785 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 786 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 787 RValue InitRVal; 788 switch (CGF.getEvaluationKind(Ty)) { 789 case TEK_Scalar: 790 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 791 break; 792 case TEK_Complex: 793 InitRVal = 794 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 795 break; 796 case TEK_Aggregate: 797 InitRVal = RValue::getAggregate(LV.getAddress()); 798 break; 799 } 800 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 801 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 802 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 803 /*IsInitializer=*/false); 804 } 805 } 806 807 /// \brief Emit initialization of arrays of complex types. 808 /// \param DestAddr Address of the array. 809 /// \param Type Type of array. 810 /// \param Init Initial expression of array. 811 /// \param SrcAddr Address of the original array. 812 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 813 QualType Type, bool EmitDeclareReductionInit, 814 const Expr *Init, 815 const OMPDeclareReductionDecl *DRD, 816 Address SrcAddr = Address::invalid()) { 817 // Perform element-by-element initialization. 818 QualType ElementTy; 819 820 // Drill down to the base element type on both arrays. 821 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 822 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 823 DestAddr = 824 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 825 if (DRD) 826 SrcAddr = 827 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 828 829 llvm::Value *SrcBegin = nullptr; 830 if (DRD) 831 SrcBegin = SrcAddr.getPointer(); 832 auto DestBegin = DestAddr.getPointer(); 833 // Cast from pointer to array type to pointer to single element. 834 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 835 // The basic structure here is a while-do loop. 836 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 837 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 838 auto IsEmpty = 839 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 840 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 841 842 // Enter the loop body, making that address the current address. 843 auto EntryBB = CGF.Builder.GetInsertBlock(); 844 CGF.EmitBlock(BodyBB); 845 846 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 847 848 llvm::PHINode *SrcElementPHI = nullptr; 849 Address SrcElementCurrent = Address::invalid(); 850 if (DRD) { 851 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 852 "omp.arraycpy.srcElementPast"); 853 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 854 SrcElementCurrent = 855 Address(SrcElementPHI, 856 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 857 } 858 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 859 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 860 DestElementPHI->addIncoming(DestBegin, EntryBB); 861 Address DestElementCurrent = 862 Address(DestElementPHI, 863 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 864 865 // Emit copy. 866 { 867 CodeGenFunction::RunCleanupsScope InitScope(CGF); 868 if (EmitDeclareReductionInit) { 869 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 870 SrcElementCurrent, ElementTy); 871 } else 872 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 873 /*IsInitializer=*/false); 874 } 875 876 if (DRD) { 877 // Shift the address forward by one element. 878 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 879 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 880 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 881 } 882 883 // Shift the address forward by one element. 884 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 885 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 886 // Check whether we've reached the end. 887 auto Done = 888 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 889 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 890 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 891 892 // Done. 893 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 894 } 895 896 static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> 897 isDeclareTargetDeclaration(const ValueDecl *VD) { 898 for (const auto *D : VD->redecls()) { 899 if (!D->hasAttrs()) 900 continue; 901 if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) 902 return Attr->getMapType(); 903 } 904 return llvm::None; 905 } 906 907 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 908 return CGF.EmitOMPSharedLValue(E); 909 } 910 911 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 912 const Expr *E) { 913 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 914 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 915 return LValue(); 916 } 917 918 void ReductionCodeGen::emitAggregateInitialization( 919 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 920 const OMPDeclareReductionDecl *DRD) { 921 // Emit VarDecl with copy init for arrays. 922 // Get the address of the original variable captured in current 923 // captured region. 924 auto *PrivateVD = 925 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 926 bool EmitDeclareReductionInit = 927 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 928 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 929 EmitDeclareReductionInit, 930 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 931 : PrivateVD->getInit(), 932 DRD, SharedLVal.getAddress()); 933 } 934 935 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 936 ArrayRef<const Expr *> Privates, 937 ArrayRef<const Expr *> ReductionOps) { 938 ClausesData.reserve(Shareds.size()); 939 SharedAddresses.reserve(Shareds.size()); 940 Sizes.reserve(Shareds.size()); 941 BaseDecls.reserve(Shareds.size()); 942 auto IPriv = Privates.begin(); 943 auto IRed = ReductionOps.begin(); 944 for (const auto *Ref : Shareds) { 945 ClausesData.emplace_back(Ref, *IPriv, *IRed); 946 std::advance(IPriv, 1); 947 std::advance(IRed, 1); 948 } 949 } 950 951 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 952 assert(SharedAddresses.size() == N && 953 "Number of generated lvalues must be exactly N."); 954 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 955 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 956 SharedAddresses.emplace_back(First, Second); 957 } 958 959 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 960 auto *PrivateVD = 961 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 962 QualType PrivateType = PrivateVD->getType(); 963 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 964 if (!PrivateType->isVariablyModifiedType()) { 965 Sizes.emplace_back( 966 CGF.getTypeSize( 967 SharedAddresses[N].first.getType().getNonReferenceType()), 968 nullptr); 969 return; 970 } 971 llvm::Value *Size; 972 llvm::Value *SizeInChars; 973 llvm::Type *ElemType = 974 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 975 ->getElementType(); 976 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 977 if (AsArraySection) { 978 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 979 SharedAddresses[N].first.getPointer()); 980 Size = CGF.Builder.CreateNUWAdd( 981 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 982 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 983 } else { 984 SizeInChars = CGF.getTypeSize( 985 SharedAddresses[N].first.getType().getNonReferenceType()); 986 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 987 } 988 Sizes.emplace_back(SizeInChars, Size); 989 CodeGenFunction::OpaqueValueMapping OpaqueMap( 990 CGF, 991 cast<OpaqueValueExpr>( 992 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 993 RValue::get(Size)); 994 CGF.EmitVariablyModifiedType(PrivateType); 995 } 996 997 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 998 llvm::Value *Size) { 999 auto *PrivateVD = 1000 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1001 QualType PrivateType = PrivateVD->getType(); 1002 if (!PrivateType->isVariablyModifiedType()) { 1003 assert(!Size && !Sizes[N].second && 1004 "Size should be nullptr for non-variably modified reduction " 1005 "items."); 1006 return; 1007 } 1008 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1009 CGF, 1010 cast<OpaqueValueExpr>( 1011 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1012 RValue::get(Size)); 1013 CGF.EmitVariablyModifiedType(PrivateType); 1014 } 1015 1016 void ReductionCodeGen::emitInitialization( 1017 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1018 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1019 assert(SharedAddresses.size() > N && "No variable was generated"); 1020 auto *PrivateVD = 1021 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1022 auto *DRD = getReductionInit(ClausesData[N].ReductionOp); 1023 QualType PrivateType = PrivateVD->getType(); 1024 PrivateAddr = CGF.Builder.CreateElementBitCast( 1025 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1026 QualType SharedType = SharedAddresses[N].first.getType(); 1027 SharedLVal = CGF.MakeAddrLValue( 1028 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1029 CGF.ConvertTypeForMem(SharedType)), 1030 SharedType, SharedAddresses[N].first.getBaseInfo(), 1031 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1032 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1033 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1034 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1035 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1036 PrivateAddr, SharedLVal.getAddress(), 1037 SharedLVal.getType()); 1038 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1039 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1040 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1041 PrivateVD->getType().getQualifiers(), 1042 /*IsInitializer=*/false); 1043 } 1044 } 1045 1046 bool ReductionCodeGen::needCleanups(unsigned N) { 1047 auto *PrivateVD = 1048 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1049 QualType PrivateType = PrivateVD->getType(); 1050 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1051 return DTorKind != QualType::DK_none; 1052 } 1053 1054 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1055 Address PrivateAddr) { 1056 auto *PrivateVD = 1057 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1058 QualType PrivateType = PrivateVD->getType(); 1059 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1060 if (needCleanups(N)) { 1061 PrivateAddr = CGF.Builder.CreateElementBitCast( 1062 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1063 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1064 } 1065 } 1066 1067 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1068 LValue BaseLV) { 1069 BaseTy = BaseTy.getNonReferenceType(); 1070 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1071 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1072 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 1073 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1074 else { 1075 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1076 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1077 } 1078 BaseTy = BaseTy->getPointeeType(); 1079 } 1080 return CGF.MakeAddrLValue( 1081 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1082 CGF.ConvertTypeForMem(ElTy)), 1083 BaseLV.getType(), BaseLV.getBaseInfo(), 1084 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1085 } 1086 1087 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1088 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1089 llvm::Value *Addr) { 1090 Address Tmp = Address::invalid(); 1091 Address TopTmp = Address::invalid(); 1092 Address MostTopTmp = Address::invalid(); 1093 BaseTy = BaseTy.getNonReferenceType(); 1094 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1095 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1096 Tmp = CGF.CreateMemTemp(BaseTy); 1097 if (TopTmp.isValid()) 1098 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1099 else 1100 MostTopTmp = Tmp; 1101 TopTmp = Tmp; 1102 BaseTy = BaseTy->getPointeeType(); 1103 } 1104 llvm::Type *Ty = BaseLVType; 1105 if (Tmp.isValid()) 1106 Ty = Tmp.getElementType(); 1107 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1108 if (Tmp.isValid()) { 1109 CGF.Builder.CreateStore(Addr, Tmp); 1110 return MostTopTmp; 1111 } 1112 return Address(Addr, BaseLVAlignment); 1113 } 1114 1115 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1116 const VarDecl *OrigVD = nullptr; 1117 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1118 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 1119 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1120 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1121 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1122 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1123 DE = cast<DeclRefExpr>(Base); 1124 OrigVD = cast<VarDecl>(DE->getDecl()); 1125 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1126 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1127 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1128 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1129 DE = cast<DeclRefExpr>(Base); 1130 OrigVD = cast<VarDecl>(DE->getDecl()); 1131 } 1132 return OrigVD; 1133 } 1134 1135 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1136 Address PrivateAddr) { 1137 const DeclRefExpr *DE; 1138 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1139 BaseDecls.emplace_back(OrigVD); 1140 auto OriginalBaseLValue = CGF.EmitLValue(DE); 1141 LValue BaseLValue = 1142 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1143 OriginalBaseLValue); 1144 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1145 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1146 llvm::Value *PrivatePointer = 1147 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1148 PrivateAddr.getPointer(), 1149 SharedAddresses[N].first.getAddress().getType()); 1150 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1151 return castToBase(CGF, OrigVD->getType(), 1152 SharedAddresses[N].first.getType(), 1153 OriginalBaseLValue.getAddress().getType(), 1154 OriginalBaseLValue.getAlignment(), Ptr); 1155 } 1156 BaseDecls.emplace_back( 1157 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1158 return PrivateAddr; 1159 } 1160 1161 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1162 auto *DRD = getReductionInit(ClausesData[N].ReductionOp); 1163 return DRD && DRD->getInitializer(); 1164 } 1165 1166 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1167 return CGF.EmitLoadOfPointerLValue( 1168 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1169 getThreadIDVariable()->getType()->castAs<PointerType>()); 1170 } 1171 1172 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1173 if (!CGF.HaveInsertPoint()) 1174 return; 1175 // 1.2.2 OpenMP Language Terminology 1176 // Structured block - An executable statement with a single entry at the 1177 // top and a single exit at the bottom. 1178 // The point of exit cannot be a branch out of the structured block. 1179 // longjmp() and throw() must not violate the entry/exit criteria. 1180 CGF.EHStack.pushTerminate(); 1181 CodeGen(CGF); 1182 CGF.EHStack.popTerminate(); 1183 } 1184 1185 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1186 CodeGenFunction &CGF) { 1187 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1188 getThreadIDVariable()->getType(), 1189 AlignmentSource::Decl); 1190 } 1191 1192 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1193 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 1194 IdentTy = llvm::StructType::create( 1195 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 1196 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 1197 CGM.Int8PtrTy /* psource */); 1198 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1199 1200 loadOffloadInfoMetadata(); 1201 } 1202 1203 void CGOpenMPRuntime::clear() { 1204 InternalVars.clear(); 1205 } 1206 1207 static llvm::Function * 1208 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1209 const Expr *CombinerInitializer, const VarDecl *In, 1210 const VarDecl *Out, bool IsCombiner) { 1211 // void .omp_combiner.(Ty *in, Ty *out); 1212 auto &C = CGM.getContext(); 1213 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1214 FunctionArgList Args; 1215 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1216 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1217 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1218 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1219 Args.push_back(&OmpOutParm); 1220 Args.push_back(&OmpInParm); 1221 auto &FnInfo = 1222 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1223 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1224 auto *Fn = llvm::Function::Create( 1225 FnTy, llvm::GlobalValue::InternalLinkage, 1226 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 1227 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1228 Fn->removeFnAttr(llvm::Attribute::NoInline); 1229 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1230 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1231 CodeGenFunction CGF(CGM); 1232 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1233 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1234 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1235 Out->getLocation()); 1236 CodeGenFunction::OMPPrivateScope Scope(CGF); 1237 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1238 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 1239 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1240 .getAddress(); 1241 }); 1242 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1243 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 1244 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1245 .getAddress(); 1246 }); 1247 (void)Scope.Privatize(); 1248 if (!IsCombiner && Out->hasInit() && 1249 !CGF.isTrivialInitializer(Out->getInit())) { 1250 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1251 Out->getType().getQualifiers(), 1252 /*IsInitializer=*/true); 1253 } 1254 if (CombinerInitializer) 1255 CGF.EmitIgnoredExpr(CombinerInitializer); 1256 Scope.ForceCleanup(); 1257 CGF.FinishFunction(); 1258 return Fn; 1259 } 1260 1261 void CGOpenMPRuntime::emitUserDefinedReduction( 1262 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1263 if (UDRMap.count(D) > 0) 1264 return; 1265 auto &C = CGM.getContext(); 1266 if (!In || !Out) { 1267 In = &C.Idents.get("omp_in"); 1268 Out = &C.Idents.get("omp_out"); 1269 } 1270 llvm::Function *Combiner = emitCombinerOrInitializer( 1271 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 1272 cast<VarDecl>(D->lookup(Out).front()), 1273 /*IsCombiner=*/true); 1274 llvm::Function *Initializer = nullptr; 1275 if (auto *Init = D->getInitializer()) { 1276 if (!Priv || !Orig) { 1277 Priv = &C.Idents.get("omp_priv"); 1278 Orig = &C.Idents.get("omp_orig"); 1279 } 1280 Initializer = emitCombinerOrInitializer( 1281 CGM, D->getType(), 1282 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1283 : nullptr, 1284 cast<VarDecl>(D->lookup(Orig).front()), 1285 cast<VarDecl>(D->lookup(Priv).front()), 1286 /*IsCombiner=*/false); 1287 } 1288 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 1289 if (CGF) { 1290 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1291 Decls.second.push_back(D); 1292 } 1293 } 1294 1295 std::pair<llvm::Function *, llvm::Function *> 1296 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1297 auto I = UDRMap.find(D); 1298 if (I != UDRMap.end()) 1299 return I->second; 1300 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1301 return UDRMap.lookup(D); 1302 } 1303 1304 // Layout information for ident_t. 1305 static CharUnits getIdentAlign(CodeGenModule &CGM) { 1306 return CGM.getPointerAlign(); 1307 } 1308 static CharUnits getIdentSize(CodeGenModule &CGM) { 1309 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 1310 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 1311 } 1312 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 1313 // All the fields except the last are i32, so this works beautifully. 1314 return unsigned(Field) * CharUnits::fromQuantity(4); 1315 } 1316 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 1317 IdentFieldIndex Field, 1318 const llvm::Twine &Name = "") { 1319 auto Offset = getOffsetOfIdentField(Field); 1320 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 1321 } 1322 1323 static llvm::Value *emitParallelOrTeamsOutlinedFunction( 1324 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1325 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1326 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1327 assert(ThreadIDVar->getType()->isPointerType() && 1328 "thread id variable must be of type kmp_int32 *"); 1329 CodeGenFunction CGF(CGM, true); 1330 bool HasCancel = false; 1331 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1332 HasCancel = OPD->hasCancel(); 1333 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1334 HasCancel = OPSD->hasCancel(); 1335 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1336 HasCancel = OPFD->hasCancel(); 1337 else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1338 HasCancel = OPFD->hasCancel(); 1339 else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1340 HasCancel = OPFD->hasCancel(); 1341 else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1342 HasCancel = OPFD->hasCancel(); 1343 else if (auto *OPFD = 1344 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1345 HasCancel = OPFD->hasCancel(); 1346 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1347 HasCancel, OutlinedHelperName); 1348 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1349 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1350 } 1351 1352 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 1353 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1354 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1355 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1356 return emitParallelOrTeamsOutlinedFunction( 1357 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1358 } 1359 1360 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1361 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1362 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1363 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1364 return emitParallelOrTeamsOutlinedFunction( 1365 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1366 } 1367 1368 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 1369 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1370 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1371 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1372 bool Tied, unsigned &NumberOfParts) { 1373 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1374 PrePostActionTy &) { 1375 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 1376 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 1377 llvm::Value *TaskArgs[] = { 1378 UpLoc, ThreadID, 1379 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1380 TaskTVar->getType()->castAs<PointerType>()) 1381 .getPointer()}; 1382 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1383 }; 1384 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1385 UntiedCodeGen); 1386 CodeGen.setAction(Action); 1387 assert(!ThreadIDVar->getType()->isPointerType() && 1388 "thread id variable must be of type kmp_int32 for tasks"); 1389 const OpenMPDirectiveKind Region = 1390 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1391 : OMPD_task; 1392 auto *CS = D.getCapturedStmt(Region); 1393 auto *TD = dyn_cast<OMPTaskDirective>(&D); 1394 CodeGenFunction CGF(CGM, true); 1395 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1396 InnermostKind, 1397 TD ? TD->hasCancel() : false, Action); 1398 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1399 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 1400 if (!Tied) 1401 NumberOfParts = Action.getNumberOfParts(); 1402 return Res; 1403 } 1404 1405 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1406 CharUnits Align = getIdentAlign(CGM); 1407 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 1408 if (!Entry) { 1409 if (!DefaultOpenMPPSource) { 1410 // Initialize default location for psource field of ident_t structure of 1411 // all ident_t objects. Format is ";file;function;line;column;;". 1412 // Taken from 1413 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 1414 DefaultOpenMPPSource = 1415 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1416 DefaultOpenMPPSource = 1417 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1418 } 1419 1420 ConstantInitBuilder builder(CGM); 1421 auto fields = builder.beginStruct(IdentTy); 1422 fields.addInt(CGM.Int32Ty, 0); 1423 fields.addInt(CGM.Int32Ty, Flags); 1424 fields.addInt(CGM.Int32Ty, 0); 1425 fields.addInt(CGM.Int32Ty, 0); 1426 fields.add(DefaultOpenMPPSource); 1427 auto DefaultOpenMPLocation = 1428 fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, 1429 llvm::GlobalValue::PrivateLinkage); 1430 DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 1431 1432 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 1433 } 1434 return Address(Entry, Align); 1435 } 1436 1437 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1438 SourceLocation Loc, 1439 unsigned Flags) { 1440 Flags |= OMP_IDENT_KMPC; 1441 // If no debug info is generated - return global default location. 1442 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1443 Loc.isInvalid()) 1444 return getOrCreateDefaultLocation(Flags).getPointer(); 1445 1446 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1447 1448 Address LocValue = Address::invalid(); 1449 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1450 if (I != OpenMPLocThreadIDMap.end()) 1451 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 1452 1453 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1454 // GetOpenMPThreadID was called before this routine. 1455 if (!LocValue.isValid()) { 1456 // Generate "ident_t .kmpc_loc.addr;" 1457 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 1458 ".kmpc_loc.addr"); 1459 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1460 Elem.second.DebugLoc = AI.getPointer(); 1461 LocValue = AI; 1462 1463 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1464 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1465 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1466 CGM.getSize(getIdentSize(CGF.CGM))); 1467 } 1468 1469 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1470 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 1471 1472 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1473 if (OMPDebugLoc == nullptr) { 1474 SmallString<128> Buffer2; 1475 llvm::raw_svector_ostream OS2(Buffer2); 1476 // Build debug location 1477 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1478 OS2 << ";" << PLoc.getFilename() << ";"; 1479 if (const FunctionDecl *FD = 1480 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 1481 OS2 << FD->getQualifiedNameAsString(); 1482 } 1483 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1484 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1485 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1486 } 1487 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1488 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 1489 1490 // Our callers always pass this to a runtime function, so for 1491 // convenience, go ahead and return a naked pointer. 1492 return LocValue.getPointer(); 1493 } 1494 1495 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1496 SourceLocation Loc) { 1497 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1498 1499 llvm::Value *ThreadID = nullptr; 1500 // Check whether we've already cached a load of the thread id in this 1501 // function. 1502 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1503 if (I != OpenMPLocThreadIDMap.end()) { 1504 ThreadID = I->second.ThreadID; 1505 if (ThreadID != nullptr) 1506 return ThreadID; 1507 } 1508 // If exceptions are enabled, do not use parameter to avoid possible crash. 1509 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1510 !CGF.getLangOpts().CXXExceptions || 1511 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1512 if (auto *OMPRegionInfo = 1513 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1514 if (OMPRegionInfo->getThreadIDVariable()) { 1515 // Check if this an outlined function with thread id passed as argument. 1516 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1517 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1518 // If value loaded in entry block, cache it and use it everywhere in 1519 // function. 1520 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1521 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1522 Elem.second.ThreadID = ThreadID; 1523 } 1524 return ThreadID; 1525 } 1526 } 1527 } 1528 1529 // This is not an outlined function region - need to call __kmpc_int32 1530 // kmpc_global_thread_num(ident_t *loc). 1531 // Generate thread id value and cache this value for use across the 1532 // function. 1533 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1534 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1535 auto *Call = CGF.Builder.CreateCall( 1536 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1537 emitUpdateLocation(CGF, Loc)); 1538 Call->setCallingConv(CGF.getRuntimeCC()); 1539 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1540 Elem.second.ThreadID = Call; 1541 return Call; 1542 } 1543 1544 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1545 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1546 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1547 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1548 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1549 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1550 UDRMap.erase(D); 1551 } 1552 FunctionUDRMap.erase(CGF.CurFn); 1553 } 1554 } 1555 1556 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1557 if (!IdentTy) { 1558 } 1559 return llvm::PointerType::getUnqual(IdentTy); 1560 } 1561 1562 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1563 if (!Kmpc_MicroTy) { 1564 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1565 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1566 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1567 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1568 } 1569 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1570 } 1571 1572 llvm::Constant * 1573 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1574 llvm::Constant *RTLFn = nullptr; 1575 switch (static_cast<OpenMPRTLFunction>(Function)) { 1576 case OMPRTL__kmpc_fork_call: { 1577 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1578 // microtask, ...); 1579 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1580 getKmpc_MicroPointerTy()}; 1581 llvm::FunctionType *FnTy = 1582 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1583 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1584 break; 1585 } 1586 case OMPRTL__kmpc_global_thread_num: { 1587 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1588 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1589 llvm::FunctionType *FnTy = 1590 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1591 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1592 break; 1593 } 1594 case OMPRTL__kmpc_threadprivate_cached: { 1595 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1596 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1597 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1598 CGM.VoidPtrTy, CGM.SizeTy, 1599 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1600 llvm::FunctionType *FnTy = 1601 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1602 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1603 break; 1604 } 1605 case OMPRTL__kmpc_critical: { 1606 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1607 // kmp_critical_name *crit); 1608 llvm::Type *TypeParams[] = { 1609 getIdentTyPointerTy(), CGM.Int32Ty, 1610 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1611 llvm::FunctionType *FnTy = 1612 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1613 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1614 break; 1615 } 1616 case OMPRTL__kmpc_critical_with_hint: { 1617 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1618 // kmp_critical_name *crit, uintptr_t hint); 1619 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1620 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1621 CGM.IntPtrTy}; 1622 llvm::FunctionType *FnTy = 1623 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1624 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1625 break; 1626 } 1627 case OMPRTL__kmpc_threadprivate_register: { 1628 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1629 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1630 // typedef void *(*kmpc_ctor)(void *); 1631 auto KmpcCtorTy = 1632 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1633 /*isVarArg*/ false)->getPointerTo(); 1634 // typedef void *(*kmpc_cctor)(void *, void *); 1635 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1636 auto KmpcCopyCtorTy = 1637 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1638 /*isVarArg*/ false)->getPointerTo(); 1639 // typedef void (*kmpc_dtor)(void *); 1640 auto KmpcDtorTy = 1641 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1642 ->getPointerTo(); 1643 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1644 KmpcCopyCtorTy, KmpcDtorTy}; 1645 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1646 /*isVarArg*/ false); 1647 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1648 break; 1649 } 1650 case OMPRTL__kmpc_end_critical: { 1651 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1652 // kmp_critical_name *crit); 1653 llvm::Type *TypeParams[] = { 1654 getIdentTyPointerTy(), CGM.Int32Ty, 1655 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1656 llvm::FunctionType *FnTy = 1657 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1658 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1659 break; 1660 } 1661 case OMPRTL__kmpc_cancel_barrier: { 1662 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1663 // global_tid); 1664 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1665 llvm::FunctionType *FnTy = 1666 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1667 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1668 break; 1669 } 1670 case OMPRTL__kmpc_barrier: { 1671 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1672 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1673 llvm::FunctionType *FnTy = 1674 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1675 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1676 break; 1677 } 1678 case OMPRTL__kmpc_for_static_fini: { 1679 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1680 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1681 llvm::FunctionType *FnTy = 1682 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1683 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1684 break; 1685 } 1686 case OMPRTL__kmpc_push_num_threads: { 1687 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1688 // kmp_int32 num_threads) 1689 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1690 CGM.Int32Ty}; 1691 llvm::FunctionType *FnTy = 1692 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1693 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1694 break; 1695 } 1696 case OMPRTL__kmpc_serialized_parallel: { 1697 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1698 // global_tid); 1699 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1700 llvm::FunctionType *FnTy = 1701 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1702 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1703 break; 1704 } 1705 case OMPRTL__kmpc_end_serialized_parallel: { 1706 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1707 // global_tid); 1708 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1709 llvm::FunctionType *FnTy = 1710 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1711 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1712 break; 1713 } 1714 case OMPRTL__kmpc_flush: { 1715 // Build void __kmpc_flush(ident_t *loc); 1716 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1717 llvm::FunctionType *FnTy = 1718 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1719 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1720 break; 1721 } 1722 case OMPRTL__kmpc_master: { 1723 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1724 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1725 llvm::FunctionType *FnTy = 1726 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1727 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1728 break; 1729 } 1730 case OMPRTL__kmpc_end_master: { 1731 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1732 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1733 llvm::FunctionType *FnTy = 1734 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1735 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1736 break; 1737 } 1738 case OMPRTL__kmpc_omp_taskyield: { 1739 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1740 // int end_part); 1741 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1742 llvm::FunctionType *FnTy = 1743 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1744 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1745 break; 1746 } 1747 case OMPRTL__kmpc_single: { 1748 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1749 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1750 llvm::FunctionType *FnTy = 1751 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1752 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1753 break; 1754 } 1755 case OMPRTL__kmpc_end_single: { 1756 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1757 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1758 llvm::FunctionType *FnTy = 1759 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1760 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1761 break; 1762 } 1763 case OMPRTL__kmpc_omp_task_alloc: { 1764 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1765 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1766 // kmp_routine_entry_t *task_entry); 1767 assert(KmpRoutineEntryPtrTy != nullptr && 1768 "Type kmp_routine_entry_t must be created."); 1769 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1770 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1771 // Return void * and then cast to particular kmp_task_t type. 1772 llvm::FunctionType *FnTy = 1773 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1774 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1775 break; 1776 } 1777 case OMPRTL__kmpc_omp_task: { 1778 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1779 // *new_task); 1780 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1781 CGM.VoidPtrTy}; 1782 llvm::FunctionType *FnTy = 1783 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1784 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1785 break; 1786 } 1787 case OMPRTL__kmpc_copyprivate: { 1788 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1789 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1790 // kmp_int32 didit); 1791 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1792 auto *CpyFnTy = 1793 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1794 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1795 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1796 CGM.Int32Ty}; 1797 llvm::FunctionType *FnTy = 1798 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1799 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1800 break; 1801 } 1802 case OMPRTL__kmpc_reduce: { 1803 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1804 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1805 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1806 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1807 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1808 /*isVarArg=*/false); 1809 llvm::Type *TypeParams[] = { 1810 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1811 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1812 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1813 llvm::FunctionType *FnTy = 1814 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1815 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1816 break; 1817 } 1818 case OMPRTL__kmpc_reduce_nowait: { 1819 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1820 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1821 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1822 // *lck); 1823 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1824 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1825 /*isVarArg=*/false); 1826 llvm::Type *TypeParams[] = { 1827 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1828 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1829 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1830 llvm::FunctionType *FnTy = 1831 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1832 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1833 break; 1834 } 1835 case OMPRTL__kmpc_end_reduce: { 1836 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1837 // kmp_critical_name *lck); 1838 llvm::Type *TypeParams[] = { 1839 getIdentTyPointerTy(), CGM.Int32Ty, 1840 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1841 llvm::FunctionType *FnTy = 1842 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1843 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1844 break; 1845 } 1846 case OMPRTL__kmpc_end_reduce_nowait: { 1847 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1848 // kmp_critical_name *lck); 1849 llvm::Type *TypeParams[] = { 1850 getIdentTyPointerTy(), CGM.Int32Ty, 1851 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1852 llvm::FunctionType *FnTy = 1853 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1854 RTLFn = 1855 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1856 break; 1857 } 1858 case OMPRTL__kmpc_omp_task_begin_if0: { 1859 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1860 // *new_task); 1861 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1862 CGM.VoidPtrTy}; 1863 llvm::FunctionType *FnTy = 1864 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1865 RTLFn = 1866 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1867 break; 1868 } 1869 case OMPRTL__kmpc_omp_task_complete_if0: { 1870 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1871 // *new_task); 1872 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1873 CGM.VoidPtrTy}; 1874 llvm::FunctionType *FnTy = 1875 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1876 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1877 /*Name=*/"__kmpc_omp_task_complete_if0"); 1878 break; 1879 } 1880 case OMPRTL__kmpc_ordered: { 1881 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1882 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1883 llvm::FunctionType *FnTy = 1884 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1885 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1886 break; 1887 } 1888 case OMPRTL__kmpc_end_ordered: { 1889 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1890 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1891 llvm::FunctionType *FnTy = 1892 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1893 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1894 break; 1895 } 1896 case OMPRTL__kmpc_omp_taskwait: { 1897 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1898 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1899 llvm::FunctionType *FnTy = 1900 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1901 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1902 break; 1903 } 1904 case OMPRTL__kmpc_taskgroup: { 1905 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1906 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1907 llvm::FunctionType *FnTy = 1908 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1909 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1910 break; 1911 } 1912 case OMPRTL__kmpc_end_taskgroup: { 1913 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1914 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1915 llvm::FunctionType *FnTy = 1916 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1917 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1918 break; 1919 } 1920 case OMPRTL__kmpc_push_proc_bind: { 1921 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1922 // int proc_bind) 1923 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1924 llvm::FunctionType *FnTy = 1925 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1926 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1927 break; 1928 } 1929 case OMPRTL__kmpc_omp_task_with_deps: { 1930 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1931 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1932 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1933 llvm::Type *TypeParams[] = { 1934 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1935 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1936 llvm::FunctionType *FnTy = 1937 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1938 RTLFn = 1939 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1940 break; 1941 } 1942 case OMPRTL__kmpc_omp_wait_deps: { 1943 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1944 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1945 // kmp_depend_info_t *noalias_dep_list); 1946 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1947 CGM.Int32Ty, CGM.VoidPtrTy, 1948 CGM.Int32Ty, CGM.VoidPtrTy}; 1949 llvm::FunctionType *FnTy = 1950 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1951 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1952 break; 1953 } 1954 case OMPRTL__kmpc_cancellationpoint: { 1955 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1956 // global_tid, kmp_int32 cncl_kind) 1957 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1958 llvm::FunctionType *FnTy = 1959 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1960 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1961 break; 1962 } 1963 case OMPRTL__kmpc_cancel: { 1964 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1965 // kmp_int32 cncl_kind) 1966 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1967 llvm::FunctionType *FnTy = 1968 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1969 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1970 break; 1971 } 1972 case OMPRTL__kmpc_push_num_teams: { 1973 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1974 // kmp_int32 num_teams, kmp_int32 num_threads) 1975 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1976 CGM.Int32Ty}; 1977 llvm::FunctionType *FnTy = 1978 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1979 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1980 break; 1981 } 1982 case OMPRTL__kmpc_fork_teams: { 1983 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1984 // microtask, ...); 1985 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1986 getKmpc_MicroPointerTy()}; 1987 llvm::FunctionType *FnTy = 1988 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1989 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1990 break; 1991 } 1992 case OMPRTL__kmpc_taskloop: { 1993 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1994 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1995 // sched, kmp_uint64 grainsize, void *task_dup); 1996 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1997 CGM.IntTy, 1998 CGM.VoidPtrTy, 1999 CGM.IntTy, 2000 CGM.Int64Ty->getPointerTo(), 2001 CGM.Int64Ty->getPointerTo(), 2002 CGM.Int64Ty, 2003 CGM.IntTy, 2004 CGM.IntTy, 2005 CGM.Int64Ty, 2006 CGM.VoidPtrTy}; 2007 llvm::FunctionType *FnTy = 2008 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2009 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2010 break; 2011 } 2012 case OMPRTL__kmpc_doacross_init: { 2013 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2014 // num_dims, struct kmp_dim *dims); 2015 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2016 CGM.Int32Ty, 2017 CGM.Int32Ty, 2018 CGM.VoidPtrTy}; 2019 llvm::FunctionType *FnTy = 2020 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2021 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2022 break; 2023 } 2024 case OMPRTL__kmpc_doacross_fini: { 2025 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2026 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2027 llvm::FunctionType *FnTy = 2028 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2029 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2030 break; 2031 } 2032 case OMPRTL__kmpc_doacross_post: { 2033 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2034 // *vec); 2035 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2036 CGM.Int64Ty->getPointerTo()}; 2037 llvm::FunctionType *FnTy = 2038 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2039 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2040 break; 2041 } 2042 case OMPRTL__kmpc_doacross_wait: { 2043 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2044 // *vec); 2045 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2046 CGM.Int64Ty->getPointerTo()}; 2047 llvm::FunctionType *FnTy = 2048 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2049 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2050 break; 2051 } 2052 case OMPRTL__kmpc_task_reduction_init: { 2053 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2054 // *data); 2055 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2056 llvm::FunctionType *FnTy = 2057 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2058 RTLFn = 2059 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2060 break; 2061 } 2062 case OMPRTL__kmpc_task_reduction_get_th_data: { 2063 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2064 // *d); 2065 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2066 llvm::FunctionType *FnTy = 2067 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2068 RTLFn = CGM.CreateRuntimeFunction( 2069 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2070 break; 2071 } 2072 case OMPRTL__tgt_target: { 2073 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2074 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2075 // *arg_types); 2076 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2077 CGM.VoidPtrTy, 2078 CGM.Int32Ty, 2079 CGM.VoidPtrPtrTy, 2080 CGM.VoidPtrPtrTy, 2081 CGM.SizeTy->getPointerTo(), 2082 CGM.Int64Ty->getPointerTo()}; 2083 llvm::FunctionType *FnTy = 2084 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2085 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2086 break; 2087 } 2088 case OMPRTL__tgt_target_nowait: { 2089 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2090 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2091 // int64_t *arg_types); 2092 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2093 CGM.VoidPtrTy, 2094 CGM.Int32Ty, 2095 CGM.VoidPtrPtrTy, 2096 CGM.VoidPtrPtrTy, 2097 CGM.SizeTy->getPointerTo(), 2098 CGM.Int64Ty->getPointerTo()}; 2099 llvm::FunctionType *FnTy = 2100 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2101 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2102 break; 2103 } 2104 case OMPRTL__tgt_target_teams: { 2105 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2106 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2107 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2108 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2109 CGM.VoidPtrTy, 2110 CGM.Int32Ty, 2111 CGM.VoidPtrPtrTy, 2112 CGM.VoidPtrPtrTy, 2113 CGM.SizeTy->getPointerTo(), 2114 CGM.Int64Ty->getPointerTo(), 2115 CGM.Int32Ty, 2116 CGM.Int32Ty}; 2117 llvm::FunctionType *FnTy = 2118 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2119 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2120 break; 2121 } 2122 case OMPRTL__tgt_target_teams_nowait: { 2123 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2124 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2125 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2126 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2127 CGM.VoidPtrTy, 2128 CGM.Int32Ty, 2129 CGM.VoidPtrPtrTy, 2130 CGM.VoidPtrPtrTy, 2131 CGM.SizeTy->getPointerTo(), 2132 CGM.Int64Ty->getPointerTo(), 2133 CGM.Int32Ty, 2134 CGM.Int32Ty}; 2135 llvm::FunctionType *FnTy = 2136 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2137 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2138 break; 2139 } 2140 case OMPRTL__tgt_register_lib: { 2141 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2142 QualType ParamTy = 2143 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2144 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2145 llvm::FunctionType *FnTy = 2146 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2147 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2148 break; 2149 } 2150 case OMPRTL__tgt_unregister_lib: { 2151 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2152 QualType ParamTy = 2153 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2154 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2155 llvm::FunctionType *FnTy = 2156 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2157 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2158 break; 2159 } 2160 case OMPRTL__tgt_target_data_begin: { 2161 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2162 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2163 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2164 CGM.Int32Ty, 2165 CGM.VoidPtrPtrTy, 2166 CGM.VoidPtrPtrTy, 2167 CGM.SizeTy->getPointerTo(), 2168 CGM.Int64Ty->getPointerTo()}; 2169 llvm::FunctionType *FnTy = 2170 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2171 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2172 break; 2173 } 2174 case OMPRTL__tgt_target_data_begin_nowait: { 2175 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2176 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2177 // *arg_types); 2178 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2179 CGM.Int32Ty, 2180 CGM.VoidPtrPtrTy, 2181 CGM.VoidPtrPtrTy, 2182 CGM.SizeTy->getPointerTo(), 2183 CGM.Int64Ty->getPointerTo()}; 2184 auto *FnTy = 2185 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2186 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2187 break; 2188 } 2189 case OMPRTL__tgt_target_data_end: { 2190 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2191 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2192 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2193 CGM.Int32Ty, 2194 CGM.VoidPtrPtrTy, 2195 CGM.VoidPtrPtrTy, 2196 CGM.SizeTy->getPointerTo(), 2197 CGM.Int64Ty->getPointerTo()}; 2198 llvm::FunctionType *FnTy = 2199 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2200 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2201 break; 2202 } 2203 case OMPRTL__tgt_target_data_end_nowait: { 2204 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2205 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2206 // *arg_types); 2207 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2208 CGM.Int32Ty, 2209 CGM.VoidPtrPtrTy, 2210 CGM.VoidPtrPtrTy, 2211 CGM.SizeTy->getPointerTo(), 2212 CGM.Int64Ty->getPointerTo()}; 2213 auto *FnTy = 2214 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2215 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2216 break; 2217 } 2218 case OMPRTL__tgt_target_data_update: { 2219 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2220 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2221 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2222 CGM.Int32Ty, 2223 CGM.VoidPtrPtrTy, 2224 CGM.VoidPtrPtrTy, 2225 CGM.SizeTy->getPointerTo(), 2226 CGM.Int64Ty->getPointerTo()}; 2227 llvm::FunctionType *FnTy = 2228 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2229 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2230 break; 2231 } 2232 case OMPRTL__tgt_target_data_update_nowait: { 2233 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2234 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2235 // *arg_types); 2236 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2237 CGM.Int32Ty, 2238 CGM.VoidPtrPtrTy, 2239 CGM.VoidPtrPtrTy, 2240 CGM.SizeTy->getPointerTo(), 2241 CGM.Int64Ty->getPointerTo()}; 2242 auto *FnTy = 2243 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2244 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2245 break; 2246 } 2247 } 2248 assert(RTLFn && "Unable to find OpenMP runtime function"); 2249 return RTLFn; 2250 } 2251 2252 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 2253 bool IVSigned) { 2254 assert((IVSize == 32 || IVSize == 64) && 2255 "IV size is not compatible with the omp runtime"); 2256 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2257 : "__kmpc_for_static_init_4u") 2258 : (IVSigned ? "__kmpc_for_static_init_8" 2259 : "__kmpc_for_static_init_8u"); 2260 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2261 auto PtrTy = llvm::PointerType::getUnqual(ITy); 2262 llvm::Type *TypeParams[] = { 2263 getIdentTyPointerTy(), // loc 2264 CGM.Int32Ty, // tid 2265 CGM.Int32Ty, // schedtype 2266 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2267 PtrTy, // p_lower 2268 PtrTy, // p_upper 2269 PtrTy, // p_stride 2270 ITy, // incr 2271 ITy // chunk 2272 }; 2273 llvm::FunctionType *FnTy = 2274 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2275 return CGM.CreateRuntimeFunction(FnTy, Name); 2276 } 2277 2278 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 2279 bool IVSigned) { 2280 assert((IVSize == 32 || IVSize == 64) && 2281 "IV size is not compatible with the omp runtime"); 2282 auto Name = 2283 IVSize == 32 2284 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2285 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2286 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2287 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2288 CGM.Int32Ty, // tid 2289 CGM.Int32Ty, // schedtype 2290 ITy, // lower 2291 ITy, // upper 2292 ITy, // stride 2293 ITy // chunk 2294 }; 2295 llvm::FunctionType *FnTy = 2296 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2297 return CGM.CreateRuntimeFunction(FnTy, Name); 2298 } 2299 2300 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 2301 bool IVSigned) { 2302 assert((IVSize == 32 || IVSize == 64) && 2303 "IV size is not compatible with the omp runtime"); 2304 auto Name = 2305 IVSize == 32 2306 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2307 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2308 llvm::Type *TypeParams[] = { 2309 getIdentTyPointerTy(), // loc 2310 CGM.Int32Ty, // tid 2311 }; 2312 llvm::FunctionType *FnTy = 2313 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2314 return CGM.CreateRuntimeFunction(FnTy, Name); 2315 } 2316 2317 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 2318 bool IVSigned) { 2319 assert((IVSize == 32 || IVSize == 64) && 2320 "IV size is not compatible with the omp runtime"); 2321 auto Name = 2322 IVSize == 32 2323 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2324 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2325 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2326 auto PtrTy = llvm::PointerType::getUnqual(ITy); 2327 llvm::Type *TypeParams[] = { 2328 getIdentTyPointerTy(), // loc 2329 CGM.Int32Ty, // tid 2330 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2331 PtrTy, // p_lower 2332 PtrTy, // p_upper 2333 PtrTy // p_stride 2334 }; 2335 llvm::FunctionType *FnTy = 2336 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2337 return CGM.CreateRuntimeFunction(FnTy, Name); 2338 } 2339 2340 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { 2341 if (CGM.getLangOpts().OpenMPSimd) 2342 return Address::invalid(); 2343 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2344 isDeclareTargetDeclaration(VD); 2345 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { 2346 SmallString<64> PtrName; 2347 { 2348 llvm::raw_svector_ostream OS(PtrName); 2349 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; 2350 } 2351 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2352 if (!Ptr) { 2353 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2354 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2355 PtrName); 2356 if (!CGM.getLangOpts().OpenMPIsDevice) { 2357 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2358 GV->setLinkage(llvm::GlobalValue::ExternalLinkage); 2359 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2360 } 2361 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); 2362 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2363 } 2364 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2365 } 2366 return Address::invalid(); 2367 } 2368 2369 llvm::Constant * 2370 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2371 assert(!CGM.getLangOpts().OpenMPUseTLS || 2372 !CGM.getContext().getTargetInfo().isTLSSupported()); 2373 // Lookup the entry, lazily creating it if necessary. 2374 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 2375 Twine(CGM.getMangledName(VD)) + ".cache."); 2376 } 2377 2378 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2379 const VarDecl *VD, 2380 Address VDAddr, 2381 SourceLocation Loc) { 2382 if (CGM.getLangOpts().OpenMPUseTLS && 2383 CGM.getContext().getTargetInfo().isTLSSupported()) 2384 return VDAddr; 2385 2386 auto VarTy = VDAddr.getElementType(); 2387 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2388 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2389 CGM.Int8PtrTy), 2390 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2391 getOrCreateThreadPrivateCache(VD)}; 2392 return Address(CGF.EmitRuntimeCall( 2393 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2394 VDAddr.getAlignment()); 2395 } 2396 2397 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2398 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2399 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2400 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2401 // library. 2402 auto OMPLoc = emitUpdateLocation(CGF, Loc); 2403 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2404 OMPLoc); 2405 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2406 // to register constructor/destructor for variable. 2407 llvm::Value *Args[] = {OMPLoc, 2408 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2409 CGM.VoidPtrTy), 2410 Ctor, CopyCtor, Dtor}; 2411 CGF.EmitRuntimeCall( 2412 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2413 } 2414 2415 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2416 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2417 bool PerformInit, CodeGenFunction *CGF) { 2418 if (CGM.getLangOpts().OpenMPUseTLS && 2419 CGM.getContext().getTargetInfo().isTLSSupported()) 2420 return nullptr; 2421 2422 VD = VD->getDefinition(CGM.getContext()); 2423 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 2424 ThreadPrivateWithDefinition.insert(VD); 2425 QualType ASTTy = VD->getType(); 2426 2427 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2428 auto Init = VD->getAnyInitializer(); 2429 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2430 // Generate function that re-emits the declaration's initializer into the 2431 // threadprivate copy of the variable VD 2432 CodeGenFunction CtorCGF(CGM); 2433 FunctionArgList Args; 2434 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2435 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2436 ImplicitParamDecl::Other); 2437 Args.push_back(&Dst); 2438 2439 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2440 CGM.getContext().VoidPtrTy, Args); 2441 auto FTy = CGM.getTypes().GetFunctionType(FI); 2442 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 2443 FTy, ".__kmpc_global_ctor_.", FI, Loc); 2444 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2445 Args, Loc, Loc); 2446 auto ArgVal = CtorCGF.EmitLoadOfScalar( 2447 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2448 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2449 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2450 Arg = CtorCGF.Builder.CreateElementBitCast( 2451 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2452 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2453 /*IsInitializer=*/true); 2454 ArgVal = CtorCGF.EmitLoadOfScalar( 2455 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2456 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2457 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2458 CtorCGF.FinishFunction(); 2459 Ctor = Fn; 2460 } 2461 if (VD->getType().isDestructedType() != QualType::DK_none) { 2462 // Generate function that emits destructor call for the threadprivate copy 2463 // of the variable VD 2464 CodeGenFunction DtorCGF(CGM); 2465 FunctionArgList Args; 2466 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2467 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2468 ImplicitParamDecl::Other); 2469 Args.push_back(&Dst); 2470 2471 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2472 CGM.getContext().VoidTy, Args); 2473 auto FTy = CGM.getTypes().GetFunctionType(FI); 2474 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 2475 FTy, ".__kmpc_global_dtor_.", FI, Loc); 2476 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2477 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2478 Loc, Loc); 2479 // Create a scope with an artificial location for the body of this function. 2480 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2481 auto ArgVal = DtorCGF.EmitLoadOfScalar( 2482 DtorCGF.GetAddrOfLocalVar(&Dst), 2483 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2484 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2485 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2486 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2487 DtorCGF.FinishFunction(); 2488 Dtor = Fn; 2489 } 2490 // Do not emit init function if it is not required. 2491 if (!Ctor && !Dtor) 2492 return nullptr; 2493 2494 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2495 auto CopyCtorTy = 2496 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2497 /*isVarArg=*/false)->getPointerTo(); 2498 // Copying constructor for the threadprivate variable. 2499 // Must be NULL - reserved by runtime, but currently it requires that this 2500 // parameter is always NULL. Otherwise it fires assertion. 2501 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2502 if (Ctor == nullptr) { 2503 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2504 /*isVarArg=*/false)->getPointerTo(); 2505 Ctor = llvm::Constant::getNullValue(CtorTy); 2506 } 2507 if (Dtor == nullptr) { 2508 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2509 /*isVarArg=*/false)->getPointerTo(); 2510 Dtor = llvm::Constant::getNullValue(DtorTy); 2511 } 2512 if (!CGF) { 2513 auto InitFunctionTy = 2514 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2515 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2516 InitFunctionTy, ".__omp_threadprivate_init_.", 2517 CGM.getTypes().arrangeNullaryFunction()); 2518 CodeGenFunction InitCGF(CGM); 2519 FunctionArgList ArgList; 2520 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2521 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2522 Loc, Loc); 2523 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2524 InitCGF.FinishFunction(); 2525 return InitFunction; 2526 } 2527 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2528 } 2529 return nullptr; 2530 } 2531 2532 /// \brief Obtain information that uniquely identifies a target entry. This 2533 /// consists of the file and device IDs as well as line number associated with 2534 /// the relevant entry source location. 2535 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2536 unsigned &DeviceID, unsigned &FileID, 2537 unsigned &LineNum) { 2538 2539 auto &SM = C.getSourceManager(); 2540 2541 // The loc should be always valid and have a file ID (the user cannot use 2542 // #pragma directives in macros) 2543 2544 assert(Loc.isValid() && "Source location is expected to be always valid."); 2545 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 2546 2547 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2548 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2549 2550 llvm::sys::fs::UniqueID ID; 2551 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2552 llvm_unreachable("Source file with target region no longer exists!"); 2553 2554 DeviceID = ID.getDevice(); 2555 FileID = ID.getFile(); 2556 LineNum = PLoc.getLine(); 2557 } 2558 2559 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2560 llvm::GlobalVariable *Addr, 2561 bool PerformInit) { 2562 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2563 isDeclareTargetDeclaration(VD); 2564 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) 2565 return false; 2566 VD = VD->getDefinition(CGM.getContext()); 2567 if (VD && !DeclareTargetWithDefinition.insert(VD).second) 2568 return CGM.getLangOpts().OpenMPIsDevice; 2569 2570 QualType ASTTy = VD->getType(); 2571 2572 SourceLocation Loc = VD->getCanonicalDecl()->getLocStart(); 2573 // Produce the unique prefix to identify the new target regions. We use 2574 // the source location of the variable declaration which we know to not 2575 // conflict with any target region. 2576 unsigned DeviceID; 2577 unsigned FileID; 2578 unsigned Line; 2579 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2580 SmallString<128> Buffer, Out; 2581 { 2582 llvm::raw_svector_ostream OS(Buffer); 2583 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2584 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2585 } 2586 2587 const Expr *Init = VD->getAnyInitializer(); 2588 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2589 llvm::Constant *Ctor; 2590 llvm::Constant *ID; 2591 if (CGM.getLangOpts().OpenMPIsDevice) { 2592 // Generate function that re-emits the declaration's initializer into 2593 // the threadprivate copy of the variable VD 2594 CodeGenFunction CtorCGF(CGM); 2595 2596 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2597 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2598 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2599 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2600 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2601 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2602 FunctionArgList(), Loc, Loc); 2603 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2604 CtorCGF.EmitAnyExprToMem(Init, 2605 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2606 Init->getType().getQualifiers(), 2607 /*IsInitializer=*/true); 2608 CtorCGF.FinishFunction(); 2609 Ctor = Fn; 2610 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2611 } else { 2612 Ctor = new llvm::GlobalVariable( 2613 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2614 llvm::GlobalValue::PrivateLinkage, 2615 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2616 ID = Ctor; 2617 } 2618 2619 // Register the information for the entry associated with the constructor. 2620 Out.clear(); 2621 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2622 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2623 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2624 } 2625 if (VD->getType().isDestructedType() != QualType::DK_none) { 2626 llvm::Constant *Dtor; 2627 llvm::Constant *ID; 2628 if (CGM.getLangOpts().OpenMPIsDevice) { 2629 // Generate function that emits destructor call for the threadprivate 2630 // copy of the variable VD 2631 CodeGenFunction DtorCGF(CGM); 2632 2633 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2634 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2635 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2636 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2637 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2638 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2639 FunctionArgList(), Loc, Loc); 2640 // Create a scope with an artificial location for the body of this 2641 // function. 2642 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2643 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2644 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2645 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2646 DtorCGF.FinishFunction(); 2647 Dtor = Fn; 2648 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2649 } else { 2650 Dtor = new llvm::GlobalVariable( 2651 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2652 llvm::GlobalValue::PrivateLinkage, 2653 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2654 ID = Dtor; 2655 } 2656 // Register the information for the entry associated with the destructor. 2657 Out.clear(); 2658 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2659 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2660 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2661 } 2662 return CGM.getLangOpts().OpenMPIsDevice; 2663 } 2664 2665 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2666 QualType VarType, 2667 StringRef Name) { 2668 llvm::Twine VarName(Name, ".artificial."); 2669 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2670 llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); 2671 llvm::Value *Args[] = { 2672 emitUpdateLocation(CGF, SourceLocation()), 2673 getThreadID(CGF, SourceLocation()), 2674 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2675 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2676 /*IsSigned=*/false), 2677 getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; 2678 return Address( 2679 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2680 CGF.EmitRuntimeCall( 2681 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2682 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2683 CGM.getPointerAlign()); 2684 } 2685 2686 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 2687 /// function. Here is the logic: 2688 /// if (Cond) { 2689 /// ThenGen(); 2690 /// } else { 2691 /// ElseGen(); 2692 /// } 2693 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2694 const RegionCodeGenTy &ThenGen, 2695 const RegionCodeGenTy &ElseGen) { 2696 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2697 2698 // If the condition constant folds and can be elided, try to avoid emitting 2699 // the condition and the dead arm of the if/else. 2700 bool CondConstant; 2701 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2702 if (CondConstant) 2703 ThenGen(CGF); 2704 else 2705 ElseGen(CGF); 2706 return; 2707 } 2708 2709 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2710 // emit the conditional branch. 2711 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 2712 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 2713 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 2714 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2715 2716 // Emit the 'then' code. 2717 CGF.EmitBlock(ThenBlock); 2718 ThenGen(CGF); 2719 CGF.EmitBranch(ContBlock); 2720 // Emit the 'else' code if present. 2721 // There is no need to emit line number for unconditional branch. 2722 (void)ApplyDebugLocation::CreateEmpty(CGF); 2723 CGF.EmitBlock(ElseBlock); 2724 ElseGen(CGF); 2725 // There is no need to emit line number for unconditional branch. 2726 (void)ApplyDebugLocation::CreateEmpty(CGF); 2727 CGF.EmitBranch(ContBlock); 2728 // Emit the continuation block for code after the if. 2729 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2730 } 2731 2732 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2733 llvm::Value *OutlinedFn, 2734 ArrayRef<llvm::Value *> CapturedVars, 2735 const Expr *IfCond) { 2736 if (!CGF.HaveInsertPoint()) 2737 return; 2738 auto *RTLoc = emitUpdateLocation(CGF, Loc); 2739 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2740 PrePostActionTy &) { 2741 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2742 auto &RT = CGF.CGM.getOpenMPRuntime(); 2743 llvm::Value *Args[] = { 2744 RTLoc, 2745 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2746 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2747 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2748 RealArgs.append(std::begin(Args), std::end(Args)); 2749 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2750 2751 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2752 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2753 }; 2754 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2755 PrePostActionTy &) { 2756 auto &RT = CGF.CGM.getOpenMPRuntime(); 2757 auto ThreadID = RT.getThreadID(CGF, Loc); 2758 // Build calls: 2759 // __kmpc_serialized_parallel(&Loc, GTid); 2760 llvm::Value *Args[] = {RTLoc, ThreadID}; 2761 CGF.EmitRuntimeCall( 2762 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2763 2764 // OutlinedFn(>id, &zero, CapturedStruct); 2765 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2766 Address ZeroAddr = 2767 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 2768 /*Name*/ ".zero.addr"); 2769 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2770 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2771 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2772 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2773 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2774 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2775 2776 // __kmpc_end_serialized_parallel(&Loc, GTid); 2777 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2778 CGF.EmitRuntimeCall( 2779 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2780 EndArgs); 2781 }; 2782 if (IfCond) 2783 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2784 else { 2785 RegionCodeGenTy ThenRCG(ThenGen); 2786 ThenRCG(CGF); 2787 } 2788 } 2789 2790 // If we're inside an (outlined) parallel region, use the region info's 2791 // thread-ID variable (it is passed in a first argument of the outlined function 2792 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2793 // regular serial code region, get thread ID by calling kmp_int32 2794 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2795 // return the address of that temp. 2796 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2797 SourceLocation Loc) { 2798 if (auto *OMPRegionInfo = 2799 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2800 if (OMPRegionInfo->getThreadIDVariable()) 2801 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2802 2803 auto ThreadID = getThreadID(CGF, Loc); 2804 auto Int32Ty = 2805 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2806 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2807 CGF.EmitStoreOfScalar(ThreadID, 2808 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2809 2810 return ThreadIDTemp; 2811 } 2812 2813 llvm::Constant * 2814 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2815 const llvm::Twine &Name) { 2816 SmallString<256> Buffer; 2817 llvm::raw_svector_ostream Out(Buffer); 2818 Out << Name; 2819 auto RuntimeName = Out.str(); 2820 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 2821 if (Elem.second) { 2822 assert(Elem.second->getType()->getPointerElementType() == Ty && 2823 "OMP internal variable has different type than requested"); 2824 return &*Elem.second; 2825 } 2826 2827 return Elem.second = new llvm::GlobalVariable( 2828 CGM.getModule(), Ty, /*IsConstant*/ false, 2829 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2830 Elem.first()); 2831 } 2832 2833 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2834 llvm::Twine Name(".gomp_critical_user_", CriticalName); 2835 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 2836 } 2837 2838 namespace { 2839 /// Common pre(post)-action for different OpenMP constructs. 2840 class CommonActionTy final : public PrePostActionTy { 2841 llvm::Value *EnterCallee; 2842 ArrayRef<llvm::Value *> EnterArgs; 2843 llvm::Value *ExitCallee; 2844 ArrayRef<llvm::Value *> ExitArgs; 2845 bool Conditional; 2846 llvm::BasicBlock *ContBlock = nullptr; 2847 2848 public: 2849 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2850 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2851 bool Conditional = false) 2852 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2853 ExitArgs(ExitArgs), Conditional(Conditional) {} 2854 void Enter(CodeGenFunction &CGF) override { 2855 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2856 if (Conditional) { 2857 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2858 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2859 ContBlock = CGF.createBasicBlock("omp_if.end"); 2860 // Generate the branch (If-stmt) 2861 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2862 CGF.EmitBlock(ThenBlock); 2863 } 2864 } 2865 void Done(CodeGenFunction &CGF) { 2866 // Emit the rest of blocks/branches 2867 CGF.EmitBranch(ContBlock); 2868 CGF.EmitBlock(ContBlock, true); 2869 } 2870 void Exit(CodeGenFunction &CGF) override { 2871 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2872 } 2873 }; 2874 } // anonymous namespace 2875 2876 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2877 StringRef CriticalName, 2878 const RegionCodeGenTy &CriticalOpGen, 2879 SourceLocation Loc, const Expr *Hint) { 2880 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2881 // CriticalOpGen(); 2882 // __kmpc_end_critical(ident_t *, gtid, Lock); 2883 // Prepare arguments and build a call to __kmpc_critical 2884 if (!CGF.HaveInsertPoint()) 2885 return; 2886 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2887 getCriticalRegionLock(CriticalName)}; 2888 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2889 std::end(Args)); 2890 if (Hint) { 2891 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2892 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2893 } 2894 CommonActionTy Action( 2895 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2896 : OMPRTL__kmpc_critical), 2897 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2898 CriticalOpGen.setAction(Action); 2899 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2900 } 2901 2902 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2903 const RegionCodeGenTy &MasterOpGen, 2904 SourceLocation Loc) { 2905 if (!CGF.HaveInsertPoint()) 2906 return; 2907 // if(__kmpc_master(ident_t *, gtid)) { 2908 // MasterOpGen(); 2909 // __kmpc_end_master(ident_t *, gtid); 2910 // } 2911 // Prepare arguments and build a call to __kmpc_master 2912 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2913 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2914 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2915 /*Conditional=*/true); 2916 MasterOpGen.setAction(Action); 2917 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2918 Action.Done(CGF); 2919 } 2920 2921 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2922 SourceLocation Loc) { 2923 if (!CGF.HaveInsertPoint()) 2924 return; 2925 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2926 llvm::Value *Args[] = { 2927 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2928 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2929 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2930 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2931 Region->emitUntiedSwitch(CGF); 2932 } 2933 2934 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2935 const RegionCodeGenTy &TaskgroupOpGen, 2936 SourceLocation Loc) { 2937 if (!CGF.HaveInsertPoint()) 2938 return; 2939 // __kmpc_taskgroup(ident_t *, gtid); 2940 // TaskgroupOpGen(); 2941 // __kmpc_end_taskgroup(ident_t *, gtid); 2942 // Prepare arguments and build a call to __kmpc_taskgroup 2943 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2944 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2945 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2946 Args); 2947 TaskgroupOpGen.setAction(Action); 2948 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2949 } 2950 2951 /// Given an array of pointers to variables, project the address of a 2952 /// given variable. 2953 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2954 unsigned Index, const VarDecl *Var) { 2955 // Pull out the pointer to the variable. 2956 Address PtrAddr = 2957 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2958 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2959 2960 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2961 Addr = CGF.Builder.CreateElementBitCast( 2962 Addr, CGF.ConvertTypeForMem(Var->getType())); 2963 return Addr; 2964 } 2965 2966 static llvm::Value *emitCopyprivateCopyFunction( 2967 CodeGenModule &CGM, llvm::Type *ArgsType, 2968 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2969 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2970 SourceLocation Loc) { 2971 auto &C = CGM.getContext(); 2972 // void copy_func(void *LHSArg, void *RHSArg); 2973 FunctionArgList Args; 2974 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2975 ImplicitParamDecl::Other); 2976 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2977 ImplicitParamDecl::Other); 2978 Args.push_back(&LHSArg); 2979 Args.push_back(&RHSArg); 2980 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2981 auto *Fn = llvm::Function::Create( 2982 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2983 ".omp.copyprivate.copy_func", &CGM.getModule()); 2984 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2985 CodeGenFunction CGF(CGM); 2986 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2987 // Dest = (void*[n])(LHSArg); 2988 // Src = (void*[n])(RHSArg); 2989 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2990 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2991 ArgsType), CGF.getPointerAlign()); 2992 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2993 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2994 ArgsType), CGF.getPointerAlign()); 2995 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2996 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2997 // ... 2998 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2999 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3000 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3001 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3002 3003 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3004 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3005 3006 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3007 QualType Type = VD->getType(); 3008 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3009 } 3010 CGF.FinishFunction(); 3011 return Fn; 3012 } 3013 3014 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3015 const RegionCodeGenTy &SingleOpGen, 3016 SourceLocation Loc, 3017 ArrayRef<const Expr *> CopyprivateVars, 3018 ArrayRef<const Expr *> SrcExprs, 3019 ArrayRef<const Expr *> DstExprs, 3020 ArrayRef<const Expr *> AssignmentOps) { 3021 if (!CGF.HaveInsertPoint()) 3022 return; 3023 assert(CopyprivateVars.size() == SrcExprs.size() && 3024 CopyprivateVars.size() == DstExprs.size() && 3025 CopyprivateVars.size() == AssignmentOps.size()); 3026 auto &C = CGM.getContext(); 3027 // int32 did_it = 0; 3028 // if(__kmpc_single(ident_t *, gtid)) { 3029 // SingleOpGen(); 3030 // __kmpc_end_single(ident_t *, gtid); 3031 // did_it = 1; 3032 // } 3033 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3034 // <copy_func>, did_it); 3035 3036 Address DidIt = Address::invalid(); 3037 if (!CopyprivateVars.empty()) { 3038 // int32 did_it = 0; 3039 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3040 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3041 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3042 } 3043 // Prepare arguments and build a call to __kmpc_single 3044 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3045 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3046 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3047 /*Conditional=*/true); 3048 SingleOpGen.setAction(Action); 3049 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3050 if (DidIt.isValid()) { 3051 // did_it = 1; 3052 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3053 } 3054 Action.Done(CGF); 3055 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3056 // <copy_func>, did_it); 3057 if (DidIt.isValid()) { 3058 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3059 auto CopyprivateArrayTy = 3060 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3061 /*IndexTypeQuals=*/0); 3062 // Create a list of all private variables for copyprivate. 3063 Address CopyprivateList = 3064 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3065 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3066 Address Elem = CGF.Builder.CreateConstArrayGEP( 3067 CopyprivateList, I, CGF.getPointerSize()); 3068 CGF.Builder.CreateStore( 3069 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3070 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3071 Elem); 3072 } 3073 // Build function that copies private values from single region to all other 3074 // threads in the corresponding parallel region. 3075 auto *CpyFn = emitCopyprivateCopyFunction( 3076 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3077 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3078 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3079 Address CL = 3080 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3081 CGF.VoidPtrTy); 3082 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 3083 llvm::Value *Args[] = { 3084 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3085 getThreadID(CGF, Loc), // i32 <gtid> 3086 BufSize, // size_t <buf_size> 3087 CL.getPointer(), // void *<copyprivate list> 3088 CpyFn, // void (*) (void *, void *) <copy_func> 3089 DidItVal // i32 did_it 3090 }; 3091 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3092 } 3093 } 3094 3095 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3096 const RegionCodeGenTy &OrderedOpGen, 3097 SourceLocation Loc, bool IsThreads) { 3098 if (!CGF.HaveInsertPoint()) 3099 return; 3100 // __kmpc_ordered(ident_t *, gtid); 3101 // OrderedOpGen(); 3102 // __kmpc_end_ordered(ident_t *, gtid); 3103 // Prepare arguments and build a call to __kmpc_ordered 3104 if (IsThreads) { 3105 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3106 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3107 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3108 Args); 3109 OrderedOpGen.setAction(Action); 3110 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3111 return; 3112 } 3113 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3114 } 3115 3116 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3117 OpenMPDirectiveKind Kind, bool EmitChecks, 3118 bool ForceSimpleCall) { 3119 if (!CGF.HaveInsertPoint()) 3120 return; 3121 // Build call __kmpc_cancel_barrier(loc, thread_id); 3122 // Build call __kmpc_barrier(loc, thread_id); 3123 unsigned Flags; 3124 if (Kind == OMPD_for) 3125 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3126 else if (Kind == OMPD_sections) 3127 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3128 else if (Kind == OMPD_single) 3129 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3130 else if (Kind == OMPD_barrier) 3131 Flags = OMP_IDENT_BARRIER_EXPL; 3132 else 3133 Flags = OMP_IDENT_BARRIER_IMPL; 3134 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3135 // thread_id); 3136 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3137 getThreadID(CGF, Loc)}; 3138 if (auto *OMPRegionInfo = 3139 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3140 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3141 auto *Result = CGF.EmitRuntimeCall( 3142 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3143 if (EmitChecks) { 3144 // if (__kmpc_cancel_barrier()) { 3145 // exit from construct; 3146 // } 3147 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3148 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 3149 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 3150 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3151 CGF.EmitBlock(ExitBB); 3152 // exit from construct; 3153 auto CancelDestination = 3154 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3155 CGF.EmitBranchThroughCleanup(CancelDestination); 3156 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3157 } 3158 return; 3159 } 3160 } 3161 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3162 } 3163 3164 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 3165 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3166 bool Chunked, bool Ordered) { 3167 switch (ScheduleKind) { 3168 case OMPC_SCHEDULE_static: 3169 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3170 : (Ordered ? OMP_ord_static : OMP_sch_static); 3171 case OMPC_SCHEDULE_dynamic: 3172 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3173 case OMPC_SCHEDULE_guided: 3174 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3175 case OMPC_SCHEDULE_runtime: 3176 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3177 case OMPC_SCHEDULE_auto: 3178 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3179 case OMPC_SCHEDULE_unknown: 3180 assert(!Chunked && "chunk was specified but schedule kind not known"); 3181 return Ordered ? OMP_ord_static : OMP_sch_static; 3182 } 3183 llvm_unreachable("Unexpected runtime schedule"); 3184 } 3185 3186 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 3187 static OpenMPSchedType 3188 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3189 // only static is allowed for dist_schedule 3190 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3191 } 3192 3193 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3194 bool Chunked) const { 3195 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3196 return Schedule == OMP_sch_static; 3197 } 3198 3199 bool CGOpenMPRuntime::isStaticNonchunked( 3200 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3201 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3202 return Schedule == OMP_dist_sch_static; 3203 } 3204 3205 3206 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3207 auto Schedule = 3208 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3209 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3210 return Schedule != OMP_sch_static; 3211 } 3212 3213 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3214 OpenMPScheduleClauseModifier M1, 3215 OpenMPScheduleClauseModifier M2) { 3216 int Modifier = 0; 3217 switch (M1) { 3218 case OMPC_SCHEDULE_MODIFIER_monotonic: 3219 Modifier = OMP_sch_modifier_monotonic; 3220 break; 3221 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3222 Modifier = OMP_sch_modifier_nonmonotonic; 3223 break; 3224 case OMPC_SCHEDULE_MODIFIER_simd: 3225 if (Schedule == OMP_sch_static_chunked) 3226 Schedule = OMP_sch_static_balanced_chunked; 3227 break; 3228 case OMPC_SCHEDULE_MODIFIER_last: 3229 case OMPC_SCHEDULE_MODIFIER_unknown: 3230 break; 3231 } 3232 switch (M2) { 3233 case OMPC_SCHEDULE_MODIFIER_monotonic: 3234 Modifier = OMP_sch_modifier_monotonic; 3235 break; 3236 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3237 Modifier = OMP_sch_modifier_nonmonotonic; 3238 break; 3239 case OMPC_SCHEDULE_MODIFIER_simd: 3240 if (Schedule == OMP_sch_static_chunked) 3241 Schedule = OMP_sch_static_balanced_chunked; 3242 break; 3243 case OMPC_SCHEDULE_MODIFIER_last: 3244 case OMPC_SCHEDULE_MODIFIER_unknown: 3245 break; 3246 } 3247 return Schedule | Modifier; 3248 } 3249 3250 void CGOpenMPRuntime::emitForDispatchInit( 3251 CodeGenFunction &CGF, SourceLocation Loc, 3252 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3253 bool Ordered, const DispatchRTInput &DispatchValues) { 3254 if (!CGF.HaveInsertPoint()) 3255 return; 3256 OpenMPSchedType Schedule = getRuntimeSchedule( 3257 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3258 assert(Ordered || 3259 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3260 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3261 Schedule != OMP_sch_static_balanced_chunked)); 3262 // Call __kmpc_dispatch_init( 3263 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3264 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3265 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3266 3267 // If the Chunk was not specified in the clause - use default value 1. 3268 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3269 : CGF.Builder.getIntN(IVSize, 1); 3270 llvm::Value *Args[] = { 3271 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3272 CGF.Builder.getInt32(addMonoNonMonoModifier( 3273 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3274 DispatchValues.LB, // Lower 3275 DispatchValues.UB, // Upper 3276 CGF.Builder.getIntN(IVSize, 1), // Stride 3277 Chunk // Chunk 3278 }; 3279 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3280 } 3281 3282 static void emitForStaticInitCall( 3283 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3284 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 3285 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3286 const CGOpenMPRuntime::StaticRTInput &Values) { 3287 if (!CGF.HaveInsertPoint()) 3288 return; 3289 3290 assert(!Values.Ordered); 3291 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3292 Schedule == OMP_sch_static_balanced_chunked || 3293 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3294 Schedule == OMP_dist_sch_static || 3295 Schedule == OMP_dist_sch_static_chunked); 3296 3297 // Call __kmpc_for_static_init( 3298 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3299 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3300 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3301 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3302 llvm::Value *Chunk = Values.Chunk; 3303 if (Chunk == nullptr) { 3304 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3305 Schedule == OMP_dist_sch_static) && 3306 "expected static non-chunked schedule"); 3307 // If the Chunk was not specified in the clause - use default value 1. 3308 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3309 } else { 3310 assert((Schedule == OMP_sch_static_chunked || 3311 Schedule == OMP_sch_static_balanced_chunked || 3312 Schedule == OMP_ord_static_chunked || 3313 Schedule == OMP_dist_sch_static_chunked) && 3314 "expected static chunked schedule"); 3315 } 3316 llvm::Value *Args[] = { 3317 UpdateLocation, 3318 ThreadId, 3319 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3320 M2)), // Schedule type 3321 Values.IL.getPointer(), // &isLastIter 3322 Values.LB.getPointer(), // &LB 3323 Values.UB.getPointer(), // &UB 3324 Values.ST.getPointer(), // &Stride 3325 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3326 Chunk // Chunk 3327 }; 3328 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3329 } 3330 3331 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3332 SourceLocation Loc, 3333 OpenMPDirectiveKind DKind, 3334 const OpenMPScheduleTy &ScheduleKind, 3335 const StaticRTInput &Values) { 3336 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3337 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3338 assert(isOpenMPWorksharingDirective(DKind) && 3339 "Expected loop-based or sections-based directive."); 3340 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3341 isOpenMPLoopDirective(DKind) 3342 ? OMP_IDENT_WORK_LOOP 3343 : OMP_IDENT_WORK_SECTIONS); 3344 auto *ThreadId = getThreadID(CGF, Loc); 3345 auto *StaticInitFunction = 3346 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3347 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3348 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3349 } 3350 3351 void CGOpenMPRuntime::emitDistributeStaticInit( 3352 CodeGenFunction &CGF, SourceLocation Loc, 3353 OpenMPDistScheduleClauseKind SchedKind, 3354 const CGOpenMPRuntime::StaticRTInput &Values) { 3355 OpenMPSchedType ScheduleNum = 3356 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3357 auto *UpdatedLocation = 3358 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3359 auto *ThreadId = getThreadID(CGF, Loc); 3360 auto *StaticInitFunction = 3361 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3362 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3363 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3364 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3365 } 3366 3367 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3368 SourceLocation Loc, 3369 OpenMPDirectiveKind DKind) { 3370 if (!CGF.HaveInsertPoint()) 3371 return; 3372 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3373 llvm::Value *Args[] = { 3374 emitUpdateLocation(CGF, Loc, 3375 isOpenMPDistributeDirective(DKind) 3376 ? OMP_IDENT_WORK_DISTRIBUTE 3377 : isOpenMPLoopDirective(DKind) 3378 ? OMP_IDENT_WORK_LOOP 3379 : OMP_IDENT_WORK_SECTIONS), 3380 getThreadID(CGF, Loc)}; 3381 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3382 Args); 3383 } 3384 3385 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3386 SourceLocation Loc, 3387 unsigned IVSize, 3388 bool IVSigned) { 3389 if (!CGF.HaveInsertPoint()) 3390 return; 3391 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3392 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3393 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3394 } 3395 3396 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3397 SourceLocation Loc, unsigned IVSize, 3398 bool IVSigned, Address IL, 3399 Address LB, Address UB, 3400 Address ST) { 3401 // Call __kmpc_dispatch_next( 3402 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3403 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3404 // kmp_int[32|64] *p_stride); 3405 llvm::Value *Args[] = { 3406 emitUpdateLocation(CGF, Loc), 3407 getThreadID(CGF, Loc), 3408 IL.getPointer(), // &isLastIter 3409 LB.getPointer(), // &Lower 3410 UB.getPointer(), // &Upper 3411 ST.getPointer() // &Stride 3412 }; 3413 llvm::Value *Call = 3414 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3415 return CGF.EmitScalarConversion( 3416 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 3417 CGF.getContext().BoolTy, Loc); 3418 } 3419 3420 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3421 llvm::Value *NumThreads, 3422 SourceLocation Loc) { 3423 if (!CGF.HaveInsertPoint()) 3424 return; 3425 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3426 llvm::Value *Args[] = { 3427 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3428 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3429 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3430 Args); 3431 } 3432 3433 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3434 OpenMPProcBindClauseKind ProcBind, 3435 SourceLocation Loc) { 3436 if (!CGF.HaveInsertPoint()) 3437 return; 3438 // Constants for proc bind value accepted by the runtime. 3439 enum ProcBindTy { 3440 ProcBindFalse = 0, 3441 ProcBindTrue, 3442 ProcBindMaster, 3443 ProcBindClose, 3444 ProcBindSpread, 3445 ProcBindIntel, 3446 ProcBindDefault 3447 } RuntimeProcBind; 3448 switch (ProcBind) { 3449 case OMPC_PROC_BIND_master: 3450 RuntimeProcBind = ProcBindMaster; 3451 break; 3452 case OMPC_PROC_BIND_close: 3453 RuntimeProcBind = ProcBindClose; 3454 break; 3455 case OMPC_PROC_BIND_spread: 3456 RuntimeProcBind = ProcBindSpread; 3457 break; 3458 case OMPC_PROC_BIND_unknown: 3459 llvm_unreachable("Unsupported proc_bind value."); 3460 } 3461 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3462 llvm::Value *Args[] = { 3463 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3464 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3465 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3466 } 3467 3468 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3469 SourceLocation Loc) { 3470 if (!CGF.HaveInsertPoint()) 3471 return; 3472 // Build call void __kmpc_flush(ident_t *loc) 3473 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3474 emitUpdateLocation(CGF, Loc)); 3475 } 3476 3477 namespace { 3478 /// \brief Indexes of fields for type kmp_task_t. 3479 enum KmpTaskTFields { 3480 /// \brief List of shared variables. 3481 KmpTaskTShareds, 3482 /// \brief Task routine. 3483 KmpTaskTRoutine, 3484 /// \brief Partition id for the untied tasks. 3485 KmpTaskTPartId, 3486 /// Function with call of destructors for private variables. 3487 Data1, 3488 /// Task priority. 3489 Data2, 3490 /// (Taskloops only) Lower bound. 3491 KmpTaskTLowerBound, 3492 /// (Taskloops only) Upper bound. 3493 KmpTaskTUpperBound, 3494 /// (Taskloops only) Stride. 3495 KmpTaskTStride, 3496 /// (Taskloops only) Is last iteration flag. 3497 KmpTaskTLastIter, 3498 /// (Taskloops only) Reduction data. 3499 KmpTaskTReductions, 3500 }; 3501 } // anonymous namespace 3502 3503 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3504 return OffloadEntriesTargetRegion.empty() && 3505 OffloadEntriesDeviceGlobalVar.empty(); 3506 } 3507 3508 /// \brief Initialize target region entry. 3509 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3510 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3511 StringRef ParentName, unsigned LineNum, 3512 unsigned Order) { 3513 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3514 "only required for the device " 3515 "code generation."); 3516 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3517 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3518 OMPTargetRegionEntryTargetRegion); 3519 ++OffloadingEntriesNum; 3520 } 3521 3522 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3523 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3524 StringRef ParentName, unsigned LineNum, 3525 llvm::Constant *Addr, llvm::Constant *ID, 3526 OMPTargetRegionEntryKind Flags) { 3527 // If we are emitting code for a target, the entry is already initialized, 3528 // only has to be registered. 3529 if (CGM.getLangOpts().OpenMPIsDevice) { 3530 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3531 "Entry must exist."); 3532 auto &Entry = 3533 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3534 assert(Entry.isValid() && "Entry not initialized!"); 3535 Entry.setAddress(Addr); 3536 Entry.setID(ID); 3537 Entry.setFlags(Flags); 3538 } else { 3539 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3540 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3541 ++OffloadingEntriesNum; 3542 } 3543 } 3544 3545 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3546 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3547 unsigned LineNum) const { 3548 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3549 if (PerDevice == OffloadEntriesTargetRegion.end()) 3550 return false; 3551 auto PerFile = PerDevice->second.find(FileID); 3552 if (PerFile == PerDevice->second.end()) 3553 return false; 3554 auto PerParentName = PerFile->second.find(ParentName); 3555 if (PerParentName == PerFile->second.end()) 3556 return false; 3557 auto PerLine = PerParentName->second.find(LineNum); 3558 if (PerLine == PerParentName->second.end()) 3559 return false; 3560 // Fail if this entry is already registered. 3561 if (PerLine->second.getAddress() || PerLine->second.getID()) 3562 return false; 3563 return true; 3564 } 3565 3566 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3567 const OffloadTargetRegionEntryInfoActTy &Action) { 3568 // Scan all target region entries and perform the provided action. 3569 for (const auto &D : OffloadEntriesTargetRegion) 3570 for (const auto &F : D.second) 3571 for (const auto &P : F.second) 3572 for (const auto &L : P.second) 3573 Action(D.first, F.first, P.first(), L.first, L.second); 3574 } 3575 3576 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3577 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3578 OMPTargetGlobalVarEntryKind Flags, 3579 unsigned Order) { 3580 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3581 "only required for the device " 3582 "code generation."); 3583 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3584 ++OffloadingEntriesNum; 3585 } 3586 3587 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3588 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3589 CharUnits VarSize, 3590 OMPTargetGlobalVarEntryKind Flags, 3591 llvm::GlobalValue::LinkageTypes Linkage) { 3592 if (CGM.getLangOpts().OpenMPIsDevice) { 3593 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3594 assert(Entry.isValid() && Entry.getFlags() == Flags && 3595 "Entry not initialized!"); 3596 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3597 "Resetting with the new address."); 3598 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) 3599 return; 3600 Entry.setAddress(Addr); 3601 Entry.setVarSize(VarSize); 3602 Entry.setLinkage(Linkage); 3603 } else { 3604 if (hasDeviceGlobalVarEntryInfo(VarName)) 3605 return; 3606 OffloadEntriesDeviceGlobalVar.try_emplace( 3607 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3608 ++OffloadingEntriesNum; 3609 } 3610 } 3611 3612 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3613 actOnDeviceGlobalVarEntriesInfo( 3614 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3615 // Scan all target region entries and perform the provided action. 3616 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3617 Action(E.getKey(), E.getValue()); 3618 } 3619 3620 llvm::Function * 3621 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3622 // If we don't have entries or if we are emitting code for the device, we 3623 // don't need to do anything. 3624 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3625 return nullptr; 3626 3627 auto &M = CGM.getModule(); 3628 auto &C = CGM.getContext(); 3629 3630 // Get list of devices we care about 3631 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 3632 3633 // We should be creating an offloading descriptor only if there are devices 3634 // specified. 3635 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3636 3637 // Create the external variables that will point to the begin and end of the 3638 // host entries section. These will be defined by the linker. 3639 auto *OffloadEntryTy = 3640 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3641 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 3642 M, OffloadEntryTy, /*isConstant=*/true, 3643 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3644 ".omp_offloading.entries_begin"); 3645 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 3646 M, OffloadEntryTy, /*isConstant=*/true, 3647 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3648 ".omp_offloading.entries_end"); 3649 3650 // Create all device images 3651 auto *DeviceImageTy = cast<llvm::StructType>( 3652 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3653 ConstantInitBuilder DeviceImagesBuilder(CGM); 3654 auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); 3655 3656 for (llvm::Triple Device : Devices) { 3657 StringRef T = Device.getTriple(); 3658 auto *ImgBegin = new llvm::GlobalVariable( 3659 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 3660 /*Initializer=*/nullptr, 3661 Twine(".omp_offloading.img_start.") + Twine(T)); 3662 auto *ImgEnd = new llvm::GlobalVariable( 3663 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 3664 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 3665 3666 auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); 3667 Dev.add(ImgBegin); 3668 Dev.add(ImgEnd); 3669 Dev.add(HostEntriesBegin); 3670 Dev.add(HostEntriesEnd); 3671 Dev.finishAndAddTo(DeviceImagesEntries); 3672 } 3673 3674 // Create device images global array. 3675 llvm::GlobalVariable *DeviceImages = 3676 DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", 3677 CGM.getPointerAlign(), 3678 /*isConstant=*/true); 3679 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3680 3681 // This is a Zero array to be used in the creation of the constant expressions 3682 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3683 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3684 3685 // Create the target region descriptor. 3686 auto *BinaryDescriptorTy = cast<llvm::StructType>( 3687 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 3688 ConstantInitBuilder DescBuilder(CGM); 3689 auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); 3690 DescInit.addInt(CGM.Int32Ty, Devices.size()); 3691 DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3692 DeviceImages, 3693 Index)); 3694 DescInit.add(HostEntriesBegin); 3695 DescInit.add(HostEntriesEnd); 3696 3697 auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", 3698 CGM.getPointerAlign(), 3699 /*isConstant=*/true); 3700 3701 // Emit code to register or unregister the descriptor at execution 3702 // startup or closing, respectively. 3703 3704 llvm::Function *UnRegFn; 3705 { 3706 FunctionArgList Args; 3707 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3708 Args.push_back(&DummyPtr); 3709 3710 CodeGenFunction CGF(CGM); 3711 // Disable debug info for global (de-)initializer because they are not part 3712 // of some particular construct. 3713 CGF.disableDebugInfo(); 3714 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3715 auto FTy = CGM.getTypes().GetFunctionType(FI); 3716 UnRegFn = CGM.CreateGlobalInitOrDestructFunction( 3717 FTy, ".omp_offloading.descriptor_unreg", FI); 3718 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 3719 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3720 Desc); 3721 CGF.FinishFunction(); 3722 } 3723 llvm::Function *RegFn; 3724 { 3725 CodeGenFunction CGF(CGM); 3726 // Disable debug info for global (de-)initializer because they are not part 3727 // of some particular construct. 3728 CGF.disableDebugInfo(); 3729 auto &FI = CGM.getTypes().arrangeNullaryFunction(); 3730 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3731 RegFn = CGM.CreateGlobalInitOrDestructFunction( 3732 FTy, ".omp_offloading.descriptor_reg", FI); 3733 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 3734 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 3735 // Create a variable to drive the registration and unregistration of the 3736 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3737 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 3738 SourceLocation(), nullptr, C.CharTy, 3739 ImplicitParamDecl::Other); 3740 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3741 CGF.FinishFunction(); 3742 } 3743 if (CGM.supportsCOMDAT()) { 3744 // It is sufficient to call registration function only once, so create a 3745 // COMDAT group for registration/unregistration functions and associated 3746 // data. That would reduce startup time and code size. Registration 3747 // function serves as a COMDAT group key. 3748 auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3749 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3750 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3751 RegFn->setComdat(ComdatKey); 3752 UnRegFn->setComdat(ComdatKey); 3753 DeviceImages->setComdat(ComdatKey); 3754 Desc->setComdat(ComdatKey); 3755 } 3756 return RegFn; 3757 } 3758 3759 void CGOpenMPRuntime::createOffloadEntry( 3760 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3761 llvm::GlobalValue::LinkageTypes Linkage) { 3762 StringRef Name = Addr->getName(); 3763 auto *TgtOffloadEntryType = cast<llvm::StructType>( 3764 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 3765 llvm::LLVMContext &C = CGM.getModule().getContext(); 3766 llvm::Module &M = CGM.getModule(); 3767 3768 // Make sure the address has the right type. 3769 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 3770 3771 // Create constant string with the name. 3772 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3773 3774 llvm::GlobalVariable *Str = 3775 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 3776 llvm::GlobalValue::InternalLinkage, StrPtrInit, 3777 ".omp_offloading.entry_name"); 3778 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3779 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 3780 3781 // We can't have any padding between symbols, so we need to have 1-byte 3782 // alignment. 3783 auto Align = CharUnits::fromQuantity(1); 3784 3785 // Create the entry struct. 3786 ConstantInitBuilder EntryBuilder(CGM); 3787 auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); 3788 EntryInit.add(AddrPtr); 3789 EntryInit.add(StrPtr); 3790 EntryInit.addInt(CGM.SizeTy, Size); 3791 EntryInit.addInt(CGM.Int32Ty, Flags); 3792 EntryInit.addInt(CGM.Int32Ty, 0); 3793 llvm::GlobalVariable *Entry = EntryInit.finishAndCreateGlobal( 3794 Twine(".omp_offloading.entry.", Name), Align, 3795 /*Constant=*/true, Linkage); 3796 3797 // The entry has to be created in the section the linker expects it to be. 3798 Entry->setSection(".omp_offloading.entries"); 3799 } 3800 3801 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3802 // Emit the offloading entries and metadata so that the device codegen side 3803 // can easily figure out what to emit. The produced metadata looks like 3804 // this: 3805 // 3806 // !omp_offload.info = !{!1, ...} 3807 // 3808 // Right now we only generate metadata for function that contain target 3809 // regions. 3810 3811 // If we do not have entries, we dont need to do anything. 3812 if (OffloadEntriesInfoManager.empty()) 3813 return; 3814 3815 llvm::Module &M = CGM.getModule(); 3816 llvm::LLVMContext &C = M.getContext(); 3817 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 3818 OrderedEntries(OffloadEntriesInfoManager.size()); 3819 3820 // Auxiliary methods to create metadata values and strings. 3821 auto &&GetMDInt = [&C](unsigned V) { 3822 return llvm::ConstantAsMetadata::get( 3823 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), V)); 3824 }; 3825 3826 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3827 3828 // Create the offloading info metadata node. 3829 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3830 3831 // Create function that emits metadata for each target region entry; 3832 auto &&TargetRegionMetadataEmitter = 3833 [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( 3834 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3835 unsigned Line, 3836 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3837 // Generate metadata for target regions. Each entry of this metadata 3838 // contains: 3839 // - Entry 0 -> Kind of this type of metadata (0). 3840 // - Entry 1 -> Device ID of the file where the entry was identified. 3841 // - Entry 2 -> File ID of the file where the entry was identified. 3842 // - Entry 3 -> Mangled name of the function where the entry was 3843 // identified. 3844 // - Entry 4 -> Line in the file where the entry was identified. 3845 // - Entry 5 -> Order the entry was created. 3846 // The first element of the metadata node is the kind. 3847 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3848 GetMDInt(FileID), GetMDString(ParentName), 3849 GetMDInt(Line), GetMDInt(E.getOrder())}; 3850 3851 // Save this entry in the right position of the ordered entries array. 3852 OrderedEntries[E.getOrder()] = &E; 3853 3854 // Add metadata to the named metadata node. 3855 MD->addOperand(llvm::MDNode::get(C, Ops)); 3856 }; 3857 3858 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3859 TargetRegionMetadataEmitter); 3860 3861 // Create function that emits metadata for each device global variable entry; 3862 auto &&DeviceGlobalVarMetadataEmitter = 3863 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3864 MD](StringRef MangledName, 3865 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3866 &E) { 3867 // Generate metadata for global variables. Each entry of this metadata 3868 // contains: 3869 // - Entry 0 -> Kind of this type of metadata (1). 3870 // - Entry 1 -> Mangled name of the variable. 3871 // - Entry 2 -> Declare target kind. 3872 // - Entry 3 -> Order the entry was created. 3873 // The first element of the metadata node is the kind. 3874 llvm::Metadata *Ops[] = { 3875 GetMDInt(E.getKind()), GetMDString(MangledName), 3876 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3877 3878 // Save this entry in the right position of the ordered entries array. 3879 OrderedEntries[E.getOrder()] = &E; 3880 3881 // Add metadata to the named metadata node. 3882 MD->addOperand(llvm::MDNode::get(C, Ops)); 3883 }; 3884 3885 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3886 DeviceGlobalVarMetadataEmitter); 3887 3888 for (const auto *E : OrderedEntries) { 3889 assert(E && "All ordered entries must exist!"); 3890 if (const auto *CE = 3891 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3892 E)) { 3893 assert(CE->getID() && CE->getAddress() && 3894 "Entry ID and Addr are invalid!"); 3895 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3896 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3897 } else if (const auto *CE = 3898 dyn_cast<OffloadEntriesInfoManagerTy:: 3899 OffloadEntryInfoDeviceGlobalVar>(E)) { 3900 assert(CE->getAddress() && "Entry Addr is invalid!"); 3901 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3902 CE->getVarSize().getQuantity(), CE->getFlags(), 3903 CE->getLinkage()); 3904 } else { 3905 llvm_unreachable("Unsupported entry kind."); 3906 } 3907 } 3908 } 3909 3910 /// \brief Loads all the offload entries information from the host IR 3911 /// metadata. 3912 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3913 // If we are in target mode, load the metadata from the host IR. This code has 3914 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3915 3916 if (!CGM.getLangOpts().OpenMPIsDevice) 3917 return; 3918 3919 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3920 return; 3921 3922 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3923 if (Buf.getError()) 3924 return; 3925 3926 llvm::LLVMContext C; 3927 auto ME = expectedToErrorOrAndEmitErrors( 3928 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3929 3930 if (ME.getError()) 3931 return; 3932 3933 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3934 if (!MD) 3935 return; 3936 3937 for (llvm::MDNode *MN : MD->operands()) { 3938 auto GetMDInt = [MN](unsigned Idx) { 3939 llvm::ConstantAsMetadata *V = 3940 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3941 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3942 }; 3943 3944 auto GetMDString = [MN](unsigned Idx) { 3945 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3946 return V->getString(); 3947 }; 3948 3949 switch (GetMDInt(0)) { 3950 default: 3951 llvm_unreachable("Unexpected metadata!"); 3952 break; 3953 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3954 OffloadingEntryInfoTargetRegion: 3955 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3956 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3957 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3958 /*Order=*/GetMDInt(5)); 3959 break; 3960 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3961 OffloadingEntryInfoDeviceGlobalVar: 3962 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3963 /*MangledName=*/GetMDString(1), 3964 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3965 /*Flags=*/GetMDInt(2)), 3966 /*Order=*/GetMDInt(3)); 3967 break; 3968 } 3969 } 3970 } 3971 3972 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3973 if (!KmpRoutineEntryPtrTy) { 3974 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3975 auto &C = CGM.getContext(); 3976 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3977 FunctionProtoType::ExtProtoInfo EPI; 3978 KmpRoutineEntryPtrQTy = C.getPointerType( 3979 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3980 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3981 } 3982 } 3983 3984 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 3985 QualType FieldTy) { 3986 auto *Field = FieldDecl::Create( 3987 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 3988 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 3989 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 3990 Field->setAccess(AS_public); 3991 DC->addDecl(Field); 3992 return Field; 3993 } 3994 3995 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3996 3997 // Make sure the type of the entry is already created. This is the type we 3998 // have to create: 3999 // struct __tgt_offload_entry{ 4000 // void *addr; // Pointer to the offload entry info. 4001 // // (function or global) 4002 // char *name; // Name of the function or global. 4003 // size_t size; // Size of the entry info (0 if it a function). 4004 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4005 // int32_t reserved; // Reserved, to use by the runtime library. 4006 // }; 4007 if (TgtOffloadEntryQTy.isNull()) { 4008 ASTContext &C = CGM.getContext(); 4009 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4010 RD->startDefinition(); 4011 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4012 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4013 addFieldToRecordDecl(C, RD, C.getSizeType()); 4014 addFieldToRecordDecl( 4015 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4016 addFieldToRecordDecl( 4017 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4018 RD->completeDefinition(); 4019 RD->addAttr(PackedAttr::CreateImplicit(C)); 4020 TgtOffloadEntryQTy = C.getRecordType(RD); 4021 } 4022 return TgtOffloadEntryQTy; 4023 } 4024 4025 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4026 // These are the types we need to build: 4027 // struct __tgt_device_image{ 4028 // void *ImageStart; // Pointer to the target code start. 4029 // void *ImageEnd; // Pointer to the target code end. 4030 // // We also add the host entries to the device image, as it may be useful 4031 // // for the target runtime to have access to that information. 4032 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4033 // // the entries. 4034 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4035 // // entries (non inclusive). 4036 // }; 4037 if (TgtDeviceImageQTy.isNull()) { 4038 ASTContext &C = CGM.getContext(); 4039 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 4040 RD->startDefinition(); 4041 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4042 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4043 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4044 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4045 RD->completeDefinition(); 4046 TgtDeviceImageQTy = C.getRecordType(RD); 4047 } 4048 return TgtDeviceImageQTy; 4049 } 4050 4051 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4052 // struct __tgt_bin_desc{ 4053 // int32_t NumDevices; // Number of devices supported. 4054 // __tgt_device_image *DeviceImages; // Arrays of device images 4055 // // (one per device). 4056 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4057 // // entries. 4058 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4059 // // entries (non inclusive). 4060 // }; 4061 if (TgtBinaryDescriptorQTy.isNull()) { 4062 ASTContext &C = CGM.getContext(); 4063 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4064 RD->startDefinition(); 4065 addFieldToRecordDecl( 4066 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4067 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4068 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4069 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4070 RD->completeDefinition(); 4071 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4072 } 4073 return TgtBinaryDescriptorQTy; 4074 } 4075 4076 namespace { 4077 struct PrivateHelpersTy { 4078 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4079 const VarDecl *PrivateElemInit) 4080 : Original(Original), PrivateCopy(PrivateCopy), 4081 PrivateElemInit(PrivateElemInit) {} 4082 const VarDecl *Original; 4083 const VarDecl *PrivateCopy; 4084 const VarDecl *PrivateElemInit; 4085 }; 4086 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4087 } // anonymous namespace 4088 4089 static RecordDecl * 4090 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4091 if (!Privates.empty()) { 4092 auto &C = CGM.getContext(); 4093 // Build struct .kmp_privates_t. { 4094 // /* private vars */ 4095 // }; 4096 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 4097 RD->startDefinition(); 4098 for (auto &&Pair : Privates) { 4099 auto *VD = Pair.second.Original; 4100 auto Type = VD->getType(); 4101 Type = Type.getNonReferenceType(); 4102 auto *FD = addFieldToRecordDecl(C, RD, Type); 4103 if (VD->hasAttrs()) { 4104 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4105 E(VD->getAttrs().end()); 4106 I != E; ++I) 4107 FD->addAttr(*I); 4108 } 4109 } 4110 RD->completeDefinition(); 4111 return RD; 4112 } 4113 return nullptr; 4114 } 4115 4116 static RecordDecl * 4117 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4118 QualType KmpInt32Ty, 4119 QualType KmpRoutineEntryPointerQTy) { 4120 auto &C = CGM.getContext(); 4121 // Build struct kmp_task_t { 4122 // void * shareds; 4123 // kmp_routine_entry_t routine; 4124 // kmp_int32 part_id; 4125 // kmp_cmplrdata_t data1; 4126 // kmp_cmplrdata_t data2; 4127 // For taskloops additional fields: 4128 // kmp_uint64 lb; 4129 // kmp_uint64 ub; 4130 // kmp_int64 st; 4131 // kmp_int32 liter; 4132 // void * reductions; 4133 // }; 4134 auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4135 UD->startDefinition(); 4136 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4137 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4138 UD->completeDefinition(); 4139 QualType KmpCmplrdataTy = C.getRecordType(UD); 4140 auto *RD = C.buildImplicitRecord("kmp_task_t"); 4141 RD->startDefinition(); 4142 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4143 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4144 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4145 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4146 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4147 if (isOpenMPTaskLoopDirective(Kind)) { 4148 QualType KmpUInt64Ty = 4149 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4150 QualType KmpInt64Ty = 4151 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4152 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4153 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4154 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4155 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4156 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4157 } 4158 RD->completeDefinition(); 4159 return RD; 4160 } 4161 4162 static RecordDecl * 4163 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4164 ArrayRef<PrivateDataTy> Privates) { 4165 auto &C = CGM.getContext(); 4166 // Build struct kmp_task_t_with_privates { 4167 // kmp_task_t task_data; 4168 // .kmp_privates_t. privates; 4169 // }; 4170 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4171 RD->startDefinition(); 4172 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4173 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 4174 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4175 } 4176 RD->completeDefinition(); 4177 return RD; 4178 } 4179 4180 /// \brief Emit a proxy function which accepts kmp_task_t as the second 4181 /// argument. 4182 /// \code 4183 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4184 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4185 /// For taskloops: 4186 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4187 /// tt->reductions, tt->shareds); 4188 /// return 0; 4189 /// } 4190 /// \endcode 4191 static llvm::Value * 4192 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4193 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4194 QualType KmpTaskTWithPrivatesPtrQTy, 4195 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4196 QualType SharedsPtrTy, llvm::Value *TaskFunction, 4197 llvm::Value *TaskPrivatesMap) { 4198 auto &C = CGM.getContext(); 4199 FunctionArgList Args; 4200 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4201 ImplicitParamDecl::Other); 4202 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4203 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4204 ImplicitParamDecl::Other); 4205 Args.push_back(&GtidArg); 4206 Args.push_back(&TaskTypeArg); 4207 auto &TaskEntryFnInfo = 4208 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4209 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4210 auto *TaskEntry = 4211 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 4212 ".omp_task_entry.", &CGM.getModule()); 4213 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4214 CodeGenFunction CGF(CGM); 4215 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4216 Loc, Loc); 4217 4218 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4219 // tt, 4220 // For taskloops: 4221 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4222 // tt->task_data.shareds); 4223 auto *GtidParam = CGF.EmitLoadOfScalar( 4224 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4225 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4226 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4227 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4228 auto *KmpTaskTWithPrivatesQTyRD = 4229 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4230 LValue Base = 4231 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4232 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4233 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4234 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4235 auto *PartidParam = PartIdLVal.getPointer(); 4236 4237 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4238 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4239 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4240 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4241 CGF.ConvertTypeForMem(SharedsPtrTy)); 4242 4243 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4244 llvm::Value *PrivatesParam; 4245 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4246 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4247 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4248 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4249 } else 4250 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4251 4252 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4253 TaskPrivatesMap, 4254 CGF.Builder 4255 .CreatePointerBitCastOrAddrSpaceCast( 4256 TDBase.getAddress(), CGF.VoidPtrTy) 4257 .getPointer()}; 4258 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4259 std::end(CommonArgs)); 4260 if (isOpenMPTaskLoopDirective(Kind)) { 4261 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4262 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4263 auto *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4264 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4265 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4266 auto *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4267 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4268 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 4269 auto *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4270 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4271 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 4272 auto *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4273 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4274 auto RLVal = CGF.EmitLValueForField(Base, *RFI); 4275 auto *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4276 CallArgs.push_back(LBParam); 4277 CallArgs.push_back(UBParam); 4278 CallArgs.push_back(StParam); 4279 CallArgs.push_back(LIParam); 4280 CallArgs.push_back(RParam); 4281 } 4282 CallArgs.push_back(SharedsParam); 4283 4284 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4285 CallArgs); 4286 CGF.EmitStoreThroughLValue( 4287 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4288 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4289 CGF.FinishFunction(); 4290 return TaskEntry; 4291 } 4292 4293 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4294 SourceLocation Loc, 4295 QualType KmpInt32Ty, 4296 QualType KmpTaskTWithPrivatesPtrQTy, 4297 QualType KmpTaskTWithPrivatesQTy) { 4298 auto &C = CGM.getContext(); 4299 FunctionArgList Args; 4300 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4301 ImplicitParamDecl::Other); 4302 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4303 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4304 ImplicitParamDecl::Other); 4305 Args.push_back(&GtidArg); 4306 Args.push_back(&TaskTypeArg); 4307 auto &DestructorFnInfo = 4308 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4309 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 4310 auto *DestructorFn = 4311 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4312 ".omp_task_destructor.", &CGM.getModule()); 4313 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4314 DestructorFnInfo); 4315 CodeGenFunction CGF(CGM); 4316 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4317 Args, Loc, Loc); 4318 4319 LValue Base = CGF.EmitLoadOfPointerLValue( 4320 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4321 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4322 auto *KmpTaskTWithPrivatesQTyRD = 4323 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4324 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4325 Base = CGF.EmitLValueForField(Base, *FI); 4326 for (auto *Field : 4327 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4328 if (auto DtorKind = Field->getType().isDestructedType()) { 4329 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 4330 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4331 } 4332 } 4333 CGF.FinishFunction(); 4334 return DestructorFn; 4335 } 4336 4337 /// \brief Emit a privates mapping function for correct handling of private and 4338 /// firstprivate variables. 4339 /// \code 4340 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4341 /// **noalias priv1,..., <tyn> **noalias privn) { 4342 /// *priv1 = &.privates.priv1; 4343 /// ...; 4344 /// *privn = &.privates.privn; 4345 /// } 4346 /// \endcode 4347 static llvm::Value * 4348 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4349 ArrayRef<const Expr *> PrivateVars, 4350 ArrayRef<const Expr *> FirstprivateVars, 4351 ArrayRef<const Expr *> LastprivateVars, 4352 QualType PrivatesQTy, 4353 ArrayRef<PrivateDataTy> Privates) { 4354 auto &C = CGM.getContext(); 4355 FunctionArgList Args; 4356 ImplicitParamDecl TaskPrivatesArg( 4357 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4358 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4359 ImplicitParamDecl::Other); 4360 Args.push_back(&TaskPrivatesArg); 4361 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4362 unsigned Counter = 1; 4363 for (auto *E: PrivateVars) { 4364 Args.push_back(ImplicitParamDecl::Create( 4365 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4366 C.getPointerType(C.getPointerType(E->getType())) 4367 .withConst() 4368 .withRestrict(), 4369 ImplicitParamDecl::Other)); 4370 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4371 PrivateVarsPos[VD] = Counter; 4372 ++Counter; 4373 } 4374 for (auto *E : FirstprivateVars) { 4375 Args.push_back(ImplicitParamDecl::Create( 4376 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4377 C.getPointerType(C.getPointerType(E->getType())) 4378 .withConst() 4379 .withRestrict(), 4380 ImplicitParamDecl::Other)); 4381 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4382 PrivateVarsPos[VD] = Counter; 4383 ++Counter; 4384 } 4385 for (auto *E: LastprivateVars) { 4386 Args.push_back(ImplicitParamDecl::Create( 4387 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4388 C.getPointerType(C.getPointerType(E->getType())) 4389 .withConst() 4390 .withRestrict(), 4391 ImplicitParamDecl::Other)); 4392 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4393 PrivateVarsPos[VD] = Counter; 4394 ++Counter; 4395 } 4396 auto &TaskPrivatesMapFnInfo = 4397 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4398 auto *TaskPrivatesMapTy = 4399 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4400 auto *TaskPrivatesMap = llvm::Function::Create( 4401 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 4402 ".omp_task_privates_map.", &CGM.getModule()); 4403 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4404 TaskPrivatesMapFnInfo); 4405 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4406 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4407 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4408 CodeGenFunction CGF(CGM); 4409 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4410 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4411 4412 // *privi = &.privates.privi; 4413 LValue Base = CGF.EmitLoadOfPointerLValue( 4414 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4415 TaskPrivatesArg.getType()->castAs<PointerType>()); 4416 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4417 Counter = 0; 4418 for (auto *Field : PrivatesQTyRD->fields()) { 4419 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 4420 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4421 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4422 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4423 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4424 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4425 ++Counter; 4426 } 4427 CGF.FinishFunction(); 4428 return TaskPrivatesMap; 4429 } 4430 4431 static bool stable_sort_comparator(const PrivateDataTy P1, 4432 const PrivateDataTy P2) { 4433 return P1.first > P2.first; 4434 } 4435 4436 /// Emit initialization for private variables in task-based directives. 4437 static void emitPrivatesInit(CodeGenFunction &CGF, 4438 const OMPExecutableDirective &D, 4439 Address KmpTaskSharedsPtr, LValue TDBase, 4440 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4441 QualType SharedsTy, QualType SharedsPtrTy, 4442 const OMPTaskDataTy &Data, 4443 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4444 auto &C = CGF.getContext(); 4445 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4446 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4447 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4448 ? OMPD_taskloop 4449 : OMPD_task; 4450 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4451 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4452 LValue SrcBase; 4453 bool IsTargetTask = 4454 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4455 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4456 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4457 // PointersArray and SizesArray. The original variables for these arrays are 4458 // not captured and we get their addresses explicitly. 4459 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4460 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4461 SrcBase = CGF.MakeAddrLValue( 4462 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4463 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4464 SharedsTy); 4465 } 4466 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4467 for (auto &&Pair : Privates) { 4468 auto *VD = Pair.second.PrivateCopy; 4469 auto *Init = VD->getAnyInitializer(); 4470 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4471 !CGF.isTrivialInitializer(Init)))) { 4472 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4473 if (auto *Elem = Pair.second.PrivateElemInit) { 4474 auto *OriginalVD = Pair.second.Original; 4475 // Check if the variable is the target-based BasePointersArray, 4476 // PointersArray or SizesArray. 4477 LValue SharedRefLValue; 4478 QualType Type = OriginalVD->getType(); 4479 auto *SharedField = CapturesInfo.lookup(OriginalVD); 4480 if (IsTargetTask && !SharedField) { 4481 assert(isa<ImplicitParamDecl>(OriginalVD) && 4482 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4483 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4484 ->getNumParams() == 0 && 4485 isa<TranslationUnitDecl>( 4486 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4487 ->getDeclContext()) && 4488 "Expected artificial target data variable."); 4489 SharedRefLValue = 4490 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4491 } else { 4492 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4493 SharedRefLValue = CGF.MakeAddrLValue( 4494 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4495 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4496 SharedRefLValue.getTBAAInfo()); 4497 } 4498 if (Type->isArrayType()) { 4499 // Initialize firstprivate array. 4500 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4501 // Perform simple memcpy. 4502 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4503 } else { 4504 // Initialize firstprivate array using element-by-element 4505 // initialization. 4506 CGF.EmitOMPAggregateAssign( 4507 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4508 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4509 Address SrcElement) { 4510 // Clean up any temporaries needed by the initialization. 4511 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4512 InitScope.addPrivate( 4513 Elem, [SrcElement]() -> Address { return SrcElement; }); 4514 (void)InitScope.Privatize(); 4515 // Emit initialization for single element. 4516 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4517 CGF, &CapturesInfo); 4518 CGF.EmitAnyExprToMem(Init, DestElement, 4519 Init->getType().getQualifiers(), 4520 /*IsInitializer=*/false); 4521 }); 4522 } 4523 } else { 4524 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4525 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4526 return SharedRefLValue.getAddress(); 4527 }); 4528 (void)InitScope.Privatize(); 4529 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4530 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4531 /*capturedByInit=*/false); 4532 } 4533 } else 4534 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4535 } 4536 ++FI; 4537 } 4538 } 4539 4540 /// Check if duplication function is required for taskloops. 4541 static bool checkInitIsRequired(CodeGenFunction &CGF, 4542 ArrayRef<PrivateDataTy> Privates) { 4543 bool InitRequired = false; 4544 for (auto &&Pair : Privates) { 4545 auto *VD = Pair.second.PrivateCopy; 4546 auto *Init = VD->getAnyInitializer(); 4547 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4548 !CGF.isTrivialInitializer(Init)); 4549 } 4550 return InitRequired; 4551 } 4552 4553 4554 /// Emit task_dup function (for initialization of 4555 /// private/firstprivate/lastprivate vars and last_iter flag) 4556 /// \code 4557 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4558 /// lastpriv) { 4559 /// // setup lastprivate flag 4560 /// task_dst->last = lastpriv; 4561 /// // could be constructor calls here... 4562 /// } 4563 /// \endcode 4564 static llvm::Value * 4565 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4566 const OMPExecutableDirective &D, 4567 QualType KmpTaskTWithPrivatesPtrQTy, 4568 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4569 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4570 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4571 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4572 auto &C = CGM.getContext(); 4573 FunctionArgList Args; 4574 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4575 KmpTaskTWithPrivatesPtrQTy, 4576 ImplicitParamDecl::Other); 4577 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4578 KmpTaskTWithPrivatesPtrQTy, 4579 ImplicitParamDecl::Other); 4580 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4581 ImplicitParamDecl::Other); 4582 Args.push_back(&DstArg); 4583 Args.push_back(&SrcArg); 4584 Args.push_back(&LastprivArg); 4585 auto &TaskDupFnInfo = 4586 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4587 auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4588 auto *TaskDup = 4589 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 4590 ".omp_task_dup.", &CGM.getModule()); 4591 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4592 CodeGenFunction CGF(CGM); 4593 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4594 Loc); 4595 4596 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4597 CGF.GetAddrOfLocalVar(&DstArg), 4598 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4599 // task_dst->liter = lastpriv; 4600 if (WithLastIter) { 4601 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4602 LValue Base = CGF.EmitLValueForField( 4603 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4604 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4605 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4606 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4607 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4608 } 4609 4610 // Emit initial values for private copies (if any). 4611 assert(!Privates.empty()); 4612 Address KmpTaskSharedsPtr = Address::invalid(); 4613 if (!Data.FirstprivateVars.empty()) { 4614 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4615 CGF.GetAddrOfLocalVar(&SrcArg), 4616 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4617 LValue Base = CGF.EmitLValueForField( 4618 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4619 KmpTaskSharedsPtr = Address( 4620 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4621 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4622 KmpTaskTShareds)), 4623 Loc), 4624 CGF.getNaturalTypeAlignment(SharedsTy)); 4625 } 4626 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4627 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4628 CGF.FinishFunction(); 4629 return TaskDup; 4630 } 4631 4632 /// Checks if destructor function is required to be generated. 4633 /// \return true if cleanups are required, false otherwise. 4634 static bool 4635 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4636 bool NeedsCleanup = false; 4637 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4638 auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4639 for (auto *FD : PrivateRD->fields()) { 4640 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4641 if (NeedsCleanup) 4642 break; 4643 } 4644 return NeedsCleanup; 4645 } 4646 4647 CGOpenMPRuntime::TaskResultTy 4648 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4649 const OMPExecutableDirective &D, 4650 llvm::Value *TaskFunction, QualType SharedsTy, 4651 Address Shareds, const OMPTaskDataTy &Data) { 4652 auto &C = CGM.getContext(); 4653 llvm::SmallVector<PrivateDataTy, 4> Privates; 4654 // Aggregate privates and sort them by the alignment. 4655 auto I = Data.PrivateCopies.begin(); 4656 for (auto *E : Data.PrivateVars) { 4657 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4658 Privates.push_back(std::make_pair( 4659 C.getDeclAlign(VD), 4660 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4661 /*PrivateElemInit=*/nullptr))); 4662 ++I; 4663 } 4664 I = Data.FirstprivateCopies.begin(); 4665 auto IElemInitRef = Data.FirstprivateInits.begin(); 4666 for (auto *E : Data.FirstprivateVars) { 4667 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4668 Privates.push_back(std::make_pair( 4669 C.getDeclAlign(VD), 4670 PrivateHelpersTy( 4671 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4672 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 4673 ++I; 4674 ++IElemInitRef; 4675 } 4676 I = Data.LastprivateCopies.begin(); 4677 for (auto *E : Data.LastprivateVars) { 4678 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4679 Privates.push_back(std::make_pair( 4680 C.getDeclAlign(VD), 4681 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4682 /*PrivateElemInit=*/nullptr))); 4683 ++I; 4684 } 4685 std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); 4686 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4687 // Build type kmp_routine_entry_t (if not built yet). 4688 emitKmpRoutineEntryT(KmpInt32Ty); 4689 // Build type kmp_task_t (if not built yet). 4690 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4691 if (SavedKmpTaskloopTQTy.isNull()) { 4692 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4693 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4694 } 4695 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4696 } else { 4697 assert((D.getDirectiveKind() == OMPD_task || 4698 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4699 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4700 "Expected taskloop, task or target directive"); 4701 if (SavedKmpTaskTQTy.isNull()) { 4702 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4703 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4704 } 4705 KmpTaskTQTy = SavedKmpTaskTQTy; 4706 } 4707 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4708 // Build particular struct kmp_task_t for the given task. 4709 auto *KmpTaskTWithPrivatesQTyRD = 4710 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4711 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4712 QualType KmpTaskTWithPrivatesPtrQTy = 4713 C.getPointerType(KmpTaskTWithPrivatesQTy); 4714 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4715 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 4716 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4717 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4718 4719 // Emit initial values for private copies (if any). 4720 llvm::Value *TaskPrivatesMap = nullptr; 4721 auto *TaskPrivatesMapTy = 4722 std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); 4723 if (!Privates.empty()) { 4724 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4725 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4726 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4727 FI->getType(), Privates); 4728 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4729 TaskPrivatesMap, TaskPrivatesMapTy); 4730 } else { 4731 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4732 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4733 } 4734 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4735 // kmp_task_t *tt); 4736 auto *TaskEntry = emitProxyTaskFunction( 4737 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4738 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4739 TaskPrivatesMap); 4740 4741 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4742 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4743 // kmp_routine_entry_t *task_entry); 4744 // Task flags. Format is taken from 4745 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 4746 // description of kmp_tasking_flags struct. 4747 enum { 4748 TiedFlag = 0x1, 4749 FinalFlag = 0x2, 4750 DestructorsFlag = 0x8, 4751 PriorityFlag = 0x20 4752 }; 4753 unsigned Flags = Data.Tied ? TiedFlag : 0; 4754 bool NeedsCleanup = false; 4755 if (!Privates.empty()) { 4756 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4757 if (NeedsCleanup) 4758 Flags = Flags | DestructorsFlag; 4759 } 4760 if (Data.Priority.getInt()) 4761 Flags = Flags | PriorityFlag; 4762 auto *TaskFlags = 4763 Data.Final.getPointer() 4764 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4765 CGF.Builder.getInt32(FinalFlag), 4766 CGF.Builder.getInt32(/*C=*/0)) 4767 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4768 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4769 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4770 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 4771 getThreadID(CGF, Loc), TaskFlags, 4772 KmpTaskTWithPrivatesTySize, SharedsSize, 4773 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4774 TaskEntry, KmpRoutineEntryPtrTy)}; 4775 auto *NewTask = CGF.EmitRuntimeCall( 4776 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 4777 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4778 NewTask, KmpTaskTWithPrivatesPtrTy); 4779 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4780 KmpTaskTWithPrivatesQTy); 4781 LValue TDBase = 4782 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4783 // Fill the data in the resulting kmp_task_t record. 4784 // Copy shareds if there are any. 4785 Address KmpTaskSharedsPtr = Address::invalid(); 4786 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4787 KmpTaskSharedsPtr = 4788 Address(CGF.EmitLoadOfScalar( 4789 CGF.EmitLValueForField( 4790 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4791 KmpTaskTShareds)), 4792 Loc), 4793 CGF.getNaturalTypeAlignment(SharedsTy)); 4794 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4795 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4796 CGF.EmitAggregateCopy(Dest, Src, SharedsTy); 4797 } 4798 // Emit initial values for private copies (if any). 4799 TaskResultTy Result; 4800 if (!Privates.empty()) { 4801 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4802 SharedsTy, SharedsPtrTy, Data, Privates, 4803 /*ForDup=*/false); 4804 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4805 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4806 Result.TaskDupFn = emitTaskDupFunction( 4807 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4808 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4809 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4810 } 4811 } 4812 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4813 enum { Priority = 0, Destructors = 1 }; 4814 // Provide pointer to function with destructors for privates. 4815 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4816 auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); 4817 if (NeedsCleanup) { 4818 llvm::Value *DestructorFn = emitDestructorsFunction( 4819 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4820 KmpTaskTWithPrivatesQTy); 4821 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4822 LValue DestructorsLV = CGF.EmitLValueForField( 4823 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4824 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4825 DestructorFn, KmpRoutineEntryPtrTy), 4826 DestructorsLV); 4827 } 4828 // Set priority. 4829 if (Data.Priority.getInt()) { 4830 LValue Data2LV = CGF.EmitLValueForField( 4831 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4832 LValue PriorityLV = CGF.EmitLValueForField( 4833 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4834 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4835 } 4836 Result.NewTask = NewTask; 4837 Result.TaskEntry = TaskEntry; 4838 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4839 Result.TDBase = TDBase; 4840 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4841 return Result; 4842 } 4843 4844 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4845 const OMPExecutableDirective &D, 4846 llvm::Value *TaskFunction, 4847 QualType SharedsTy, Address Shareds, 4848 const Expr *IfCond, 4849 const OMPTaskDataTy &Data) { 4850 if (!CGF.HaveInsertPoint()) 4851 return; 4852 4853 TaskResultTy Result = 4854 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4855 llvm::Value *NewTask = Result.NewTask; 4856 llvm::Value *TaskEntry = Result.TaskEntry; 4857 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4858 LValue TDBase = Result.TDBase; 4859 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4860 auto &C = CGM.getContext(); 4861 // Process list of dependences. 4862 Address DependenciesArray = Address::invalid(); 4863 unsigned NumDependencies = Data.Dependences.size(); 4864 if (NumDependencies) { 4865 // Dependence kind for RTL. 4866 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 4867 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4868 RecordDecl *KmpDependInfoRD; 4869 QualType FlagsTy = 4870 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4871 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4872 if (KmpDependInfoTy.isNull()) { 4873 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4874 KmpDependInfoRD->startDefinition(); 4875 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4876 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4877 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4878 KmpDependInfoRD->completeDefinition(); 4879 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4880 } else 4881 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4882 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 4883 // Define type kmp_depend_info[<Dependences.size()>]; 4884 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4885 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 4886 ArrayType::Normal, /*IndexTypeQuals=*/0); 4887 // kmp_depend_info[<Dependences.size()>] deps; 4888 DependenciesArray = 4889 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4890 for (unsigned i = 0; i < NumDependencies; ++i) { 4891 const Expr *E = Data.Dependences[i].second; 4892 auto Addr = CGF.EmitLValue(E); 4893 llvm::Value *Size; 4894 QualType Ty = E->getType(); 4895 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4896 LValue UpAddrLVal = 4897 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 4898 llvm::Value *UpAddr = 4899 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 4900 llvm::Value *LowIntPtr = 4901 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 4902 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 4903 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4904 } else 4905 Size = CGF.getTypeSize(Ty); 4906 auto Base = CGF.MakeAddrLValue( 4907 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 4908 KmpDependInfoTy); 4909 // deps[i].base_addr = &<Dependences[i].second>; 4910 auto BaseAddrLVal = CGF.EmitLValueForField( 4911 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4912 CGF.EmitStoreOfScalar( 4913 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 4914 BaseAddrLVal); 4915 // deps[i].len = sizeof(<Dependences[i].second>); 4916 auto LenLVal = CGF.EmitLValueForField( 4917 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4918 CGF.EmitStoreOfScalar(Size, LenLVal); 4919 // deps[i].flags = <Dependences[i].first>; 4920 RTLDependenceKindTy DepKind; 4921 switch (Data.Dependences[i].first) { 4922 case OMPC_DEPEND_in: 4923 DepKind = DepIn; 4924 break; 4925 // Out and InOut dependencies must use the same code. 4926 case OMPC_DEPEND_out: 4927 case OMPC_DEPEND_inout: 4928 DepKind = DepInOut; 4929 break; 4930 case OMPC_DEPEND_source: 4931 case OMPC_DEPEND_sink: 4932 case OMPC_DEPEND_unknown: 4933 llvm_unreachable("Unknown task dependence type"); 4934 } 4935 auto FlagsLVal = CGF.EmitLValueForField( 4936 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4937 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4938 FlagsLVal); 4939 } 4940 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4941 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 4942 CGF.VoidPtrTy); 4943 } 4944 4945 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4946 // libcall. 4947 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4948 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4949 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4950 // list is not empty 4951 auto *ThreadID = getThreadID(CGF, Loc); 4952 auto *UpLoc = emitUpdateLocation(CGF, Loc); 4953 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4954 llvm::Value *DepTaskArgs[7]; 4955 if (NumDependencies) { 4956 DepTaskArgs[0] = UpLoc; 4957 DepTaskArgs[1] = ThreadID; 4958 DepTaskArgs[2] = NewTask; 4959 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 4960 DepTaskArgs[4] = DependenciesArray.getPointer(); 4961 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4962 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4963 } 4964 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 4965 &TaskArgs, 4966 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4967 if (!Data.Tied) { 4968 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4969 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4970 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4971 } 4972 if (NumDependencies) { 4973 CGF.EmitRuntimeCall( 4974 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 4975 } else { 4976 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 4977 TaskArgs); 4978 } 4979 // Check if parent region is untied and build return for untied task; 4980 if (auto *Region = 4981 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4982 Region->emitUntiedSwitch(CGF); 4983 }; 4984 4985 llvm::Value *DepWaitTaskArgs[6]; 4986 if (NumDependencies) { 4987 DepWaitTaskArgs[0] = UpLoc; 4988 DepWaitTaskArgs[1] = ThreadID; 4989 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 4990 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4991 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4992 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4993 } 4994 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 4995 NumDependencies, &DepWaitTaskArgs, 4996 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 4997 auto &RT = CGF.CGM.getOpenMPRuntime(); 4998 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4999 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5000 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5001 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5002 // is specified. 5003 if (NumDependencies) 5004 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5005 DepWaitTaskArgs); 5006 // Call proxy_task_entry(gtid, new_task); 5007 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5008 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5009 Action.Enter(CGF); 5010 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5011 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5012 OutlinedFnArgs); 5013 }; 5014 5015 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5016 // kmp_task_t *new_task); 5017 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5018 // kmp_task_t *new_task); 5019 RegionCodeGenTy RCG(CodeGen); 5020 CommonActionTy Action( 5021 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5022 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5023 RCG.setAction(Action); 5024 RCG(CGF); 5025 }; 5026 5027 if (IfCond) 5028 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5029 else { 5030 RegionCodeGenTy ThenRCG(ThenCodeGen); 5031 ThenRCG(CGF); 5032 } 5033 } 5034 5035 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5036 const OMPLoopDirective &D, 5037 llvm::Value *TaskFunction, 5038 QualType SharedsTy, Address Shareds, 5039 const Expr *IfCond, 5040 const OMPTaskDataTy &Data) { 5041 if (!CGF.HaveInsertPoint()) 5042 return; 5043 TaskResultTy Result = 5044 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5045 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 5046 // libcall. 5047 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5048 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5049 // sched, kmp_uint64 grainsize, void *task_dup); 5050 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5051 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5052 llvm::Value *IfVal; 5053 if (IfCond) { 5054 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5055 /*isSigned=*/true); 5056 } else 5057 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5058 5059 LValue LBLVal = CGF.EmitLValueForField( 5060 Result.TDBase, 5061 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5062 auto *LBVar = 5063 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5064 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5065 /*IsInitializer=*/true); 5066 LValue UBLVal = CGF.EmitLValueForField( 5067 Result.TDBase, 5068 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5069 auto *UBVar = 5070 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5071 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5072 /*IsInitializer=*/true); 5073 LValue StLVal = CGF.EmitLValueForField( 5074 Result.TDBase, 5075 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5076 auto *StVar = 5077 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5078 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5079 /*IsInitializer=*/true); 5080 // Store reductions address. 5081 LValue RedLVal = CGF.EmitLValueForField( 5082 Result.TDBase, 5083 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5084 if (Data.Reductions) 5085 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5086 else { 5087 CGF.EmitNullInitialization(RedLVal.getAddress(), 5088 CGF.getContext().VoidPtrTy); 5089 } 5090 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5091 llvm::Value *TaskArgs[] = { 5092 UpLoc, 5093 ThreadID, 5094 Result.NewTask, 5095 IfVal, 5096 LBLVal.getPointer(), 5097 UBLVal.getPointer(), 5098 CGF.EmitLoadOfScalar(StLVal, Loc), 5099 llvm::ConstantInt::getNullValue( 5100 CGF.IntTy), // Always 0 because taskgroup emitted by the compiler 5101 llvm::ConstantInt::getSigned( 5102 CGF.IntTy, Data.Schedule.getPointer() 5103 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5104 : NoSchedule), 5105 Data.Schedule.getPointer() 5106 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5107 /*isSigned=*/false) 5108 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5109 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5110 Result.TaskDupFn, CGF.VoidPtrTy) 5111 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5112 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5113 } 5114 5115 /// \brief Emit reduction operation for each element of array (required for 5116 /// array sections) LHS op = RHS. 5117 /// \param Type Type of array. 5118 /// \param LHSVar Variable on the left side of the reduction operation 5119 /// (references element of array in original variable). 5120 /// \param RHSVar Variable on the right side of the reduction operation 5121 /// (references element of array in original variable). 5122 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5123 /// RHSVar. 5124 static void EmitOMPAggregateReduction( 5125 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5126 const VarDecl *RHSVar, 5127 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5128 const Expr *, const Expr *)> &RedOpGen, 5129 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5130 const Expr *UpExpr = nullptr) { 5131 // Perform element-by-element initialization. 5132 QualType ElementTy; 5133 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5134 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5135 5136 // Drill down to the base element type on both arrays. 5137 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 5138 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5139 5140 auto RHSBegin = RHSAddr.getPointer(); 5141 auto LHSBegin = LHSAddr.getPointer(); 5142 // Cast from pointer to array type to pointer to single element. 5143 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5144 // The basic structure here is a while-do loop. 5145 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5146 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5147 auto IsEmpty = 5148 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5149 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5150 5151 // Enter the loop body, making that address the current address. 5152 auto EntryBB = CGF.Builder.GetInsertBlock(); 5153 CGF.EmitBlock(BodyBB); 5154 5155 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5156 5157 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5158 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5159 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5160 Address RHSElementCurrent = 5161 Address(RHSElementPHI, 5162 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5163 5164 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5165 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5166 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5167 Address LHSElementCurrent = 5168 Address(LHSElementPHI, 5169 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5170 5171 // Emit copy. 5172 CodeGenFunction::OMPPrivateScope Scope(CGF); 5173 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 5174 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 5175 Scope.Privatize(); 5176 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5177 Scope.ForceCleanup(); 5178 5179 // Shift the address forward by one element. 5180 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5181 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5182 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5183 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5184 // Check whether we've reached the end. 5185 auto Done = 5186 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5187 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5188 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5189 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5190 5191 // Done. 5192 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5193 } 5194 5195 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5196 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5197 /// UDR combiner function. 5198 static void emitReductionCombiner(CodeGenFunction &CGF, 5199 const Expr *ReductionOp) { 5200 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5201 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5202 if (auto *DRE = 5203 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5204 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5205 std::pair<llvm::Function *, llvm::Function *> Reduction = 5206 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5207 RValue Func = RValue::get(Reduction.first); 5208 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5209 CGF.EmitIgnoredExpr(ReductionOp); 5210 return; 5211 } 5212 CGF.EmitIgnoredExpr(ReductionOp); 5213 } 5214 5215 llvm::Value *CGOpenMPRuntime::emitReductionFunction( 5216 CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, 5217 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5218 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5219 auto &C = CGM.getContext(); 5220 5221 // void reduction_func(void *LHSArg, void *RHSArg); 5222 FunctionArgList Args; 5223 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5224 ImplicitParamDecl::Other); 5225 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5226 ImplicitParamDecl::Other); 5227 Args.push_back(&LHSArg); 5228 Args.push_back(&RHSArg); 5229 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5230 auto *Fn = llvm::Function::Create( 5231 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 5232 ".omp.reduction.reduction_func", &CGM.getModule()); 5233 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5234 CodeGenFunction CGF(CGM); 5235 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5236 5237 // Dst = (void*[n])(LHSArg); 5238 // Src = (void*[n])(RHSArg); 5239 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5240 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5241 ArgsType), CGF.getPointerAlign()); 5242 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5243 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5244 ArgsType), CGF.getPointerAlign()); 5245 5246 // ... 5247 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5248 // ... 5249 CodeGenFunction::OMPPrivateScope Scope(CGF); 5250 auto IPriv = Privates.begin(); 5251 unsigned Idx = 0; 5252 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5253 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5254 Scope.addPrivate(RHSVar, [&]() -> Address { 5255 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5256 }); 5257 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5258 Scope.addPrivate(LHSVar, [&]() -> Address { 5259 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5260 }); 5261 QualType PrivTy = (*IPriv)->getType(); 5262 if (PrivTy->isVariablyModifiedType()) { 5263 // Get array size and emit VLA type. 5264 ++Idx; 5265 Address Elem = 5266 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 5267 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5268 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 5269 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5270 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5271 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5272 CGF.EmitVariablyModifiedType(PrivTy); 5273 } 5274 } 5275 Scope.Privatize(); 5276 IPriv = Privates.begin(); 5277 auto ILHS = LHSExprs.begin(); 5278 auto IRHS = RHSExprs.begin(); 5279 for (auto *E : ReductionOps) { 5280 if ((*IPriv)->getType()->isArrayType()) { 5281 // Emit reduction for array section. 5282 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5283 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5284 EmitOMPAggregateReduction( 5285 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5286 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5287 emitReductionCombiner(CGF, E); 5288 }); 5289 } else 5290 // Emit reduction for array subscript or single variable. 5291 emitReductionCombiner(CGF, E); 5292 ++IPriv; 5293 ++ILHS; 5294 ++IRHS; 5295 } 5296 Scope.ForceCleanup(); 5297 CGF.FinishFunction(); 5298 return Fn; 5299 } 5300 5301 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5302 const Expr *ReductionOp, 5303 const Expr *PrivateRef, 5304 const DeclRefExpr *LHS, 5305 const DeclRefExpr *RHS) { 5306 if (PrivateRef->getType()->isArrayType()) { 5307 // Emit reduction for array section. 5308 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5309 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5310 EmitOMPAggregateReduction( 5311 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5312 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5313 emitReductionCombiner(CGF, ReductionOp); 5314 }); 5315 } else 5316 // Emit reduction for array subscript or single variable. 5317 emitReductionCombiner(CGF, ReductionOp); 5318 } 5319 5320 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5321 ArrayRef<const Expr *> Privates, 5322 ArrayRef<const Expr *> LHSExprs, 5323 ArrayRef<const Expr *> RHSExprs, 5324 ArrayRef<const Expr *> ReductionOps, 5325 ReductionOptionsTy Options) { 5326 if (!CGF.HaveInsertPoint()) 5327 return; 5328 5329 bool WithNowait = Options.WithNowait; 5330 bool SimpleReduction = Options.SimpleReduction; 5331 5332 // Next code should be emitted for reduction: 5333 // 5334 // static kmp_critical_name lock = { 0 }; 5335 // 5336 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5337 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5338 // ... 5339 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5340 // *(Type<n>-1*)rhs[<n>-1]); 5341 // } 5342 // 5343 // ... 5344 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5345 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5346 // RedList, reduce_func, &<lock>)) { 5347 // case 1: 5348 // ... 5349 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5350 // ... 5351 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5352 // break; 5353 // case 2: 5354 // ... 5355 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5356 // ... 5357 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5358 // break; 5359 // default:; 5360 // } 5361 // 5362 // if SimpleReduction is true, only the next code is generated: 5363 // ... 5364 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5365 // ... 5366 5367 auto &C = CGM.getContext(); 5368 5369 if (SimpleReduction) { 5370 CodeGenFunction::RunCleanupsScope Scope(CGF); 5371 auto IPriv = Privates.begin(); 5372 auto ILHS = LHSExprs.begin(); 5373 auto IRHS = RHSExprs.begin(); 5374 for (auto *E : ReductionOps) { 5375 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5376 cast<DeclRefExpr>(*IRHS)); 5377 ++IPriv; 5378 ++ILHS; 5379 ++IRHS; 5380 } 5381 return; 5382 } 5383 5384 // 1. Build a list of reduction variables. 5385 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5386 auto Size = RHSExprs.size(); 5387 for (auto *E : Privates) { 5388 if (E->getType()->isVariablyModifiedType()) 5389 // Reserve place for array size. 5390 ++Size; 5391 } 5392 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5393 QualType ReductionArrayTy = 5394 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5395 /*IndexTypeQuals=*/0); 5396 Address ReductionList = 5397 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5398 auto IPriv = Privates.begin(); 5399 unsigned Idx = 0; 5400 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5401 Address Elem = 5402 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 5403 CGF.Builder.CreateStore( 5404 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5405 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5406 Elem); 5407 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5408 // Store array size. 5409 ++Idx; 5410 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 5411 CGF.getPointerSize()); 5412 llvm::Value *Size = CGF.Builder.CreateIntCast( 5413 CGF.getVLASize( 5414 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5415 .NumElts, 5416 CGF.SizeTy, /*isSigned=*/false); 5417 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5418 Elem); 5419 } 5420 } 5421 5422 // 2. Emit reduce_func(). 5423 auto *ReductionFn = emitReductionFunction( 5424 CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), 5425 Privates, LHSExprs, RHSExprs, ReductionOps); 5426 5427 // 3. Create static kmp_critical_name lock = { 0 }; 5428 auto *Lock = getCriticalRegionLock(".reduction"); 5429 5430 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5431 // RedList, reduce_func, &<lock>); 5432 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5433 auto *ThreadId = getThreadID(CGF, Loc); 5434 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5435 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5436 ReductionList.getPointer(), CGF.VoidPtrTy); 5437 llvm::Value *Args[] = { 5438 IdentTLoc, // ident_t *<loc> 5439 ThreadId, // i32 <gtid> 5440 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5441 ReductionArrayTySize, // size_type sizeof(RedList) 5442 RL, // void *RedList 5443 ReductionFn, // void (*) (void *, void *) <reduce_func> 5444 Lock // kmp_critical_name *&<lock> 5445 }; 5446 auto Res = CGF.EmitRuntimeCall( 5447 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5448 : OMPRTL__kmpc_reduce), 5449 Args); 5450 5451 // 5. Build switch(res) 5452 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5453 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5454 5455 // 6. Build case 1: 5456 // ... 5457 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5458 // ... 5459 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5460 // break; 5461 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5462 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5463 CGF.EmitBlock(Case1BB); 5464 5465 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5466 llvm::Value *EndArgs[] = { 5467 IdentTLoc, // ident_t *<loc> 5468 ThreadId, // i32 <gtid> 5469 Lock // kmp_critical_name *&<lock> 5470 }; 5471 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 5472 CodeGenFunction &CGF, PrePostActionTy &Action) { 5473 auto &RT = CGF.CGM.getOpenMPRuntime(); 5474 auto IPriv = Privates.begin(); 5475 auto ILHS = LHSExprs.begin(); 5476 auto IRHS = RHSExprs.begin(); 5477 for (auto *E : ReductionOps) { 5478 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5479 cast<DeclRefExpr>(*IRHS)); 5480 ++IPriv; 5481 ++ILHS; 5482 ++IRHS; 5483 } 5484 }; 5485 RegionCodeGenTy RCG(CodeGen); 5486 CommonActionTy Action( 5487 nullptr, llvm::None, 5488 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5489 : OMPRTL__kmpc_end_reduce), 5490 EndArgs); 5491 RCG.setAction(Action); 5492 RCG(CGF); 5493 5494 CGF.EmitBranch(DefaultBB); 5495 5496 // 7. Build case 2: 5497 // ... 5498 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5499 // ... 5500 // break; 5501 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5502 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5503 CGF.EmitBlock(Case2BB); 5504 5505 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 5506 CodeGenFunction &CGF, PrePostActionTy &Action) { 5507 auto ILHS = LHSExprs.begin(); 5508 auto IRHS = RHSExprs.begin(); 5509 auto IPriv = Privates.begin(); 5510 for (auto *E : ReductionOps) { 5511 const Expr *XExpr = nullptr; 5512 const Expr *EExpr = nullptr; 5513 const Expr *UpExpr = nullptr; 5514 BinaryOperatorKind BO = BO_Comma; 5515 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 5516 if (BO->getOpcode() == BO_Assign) { 5517 XExpr = BO->getLHS(); 5518 UpExpr = BO->getRHS(); 5519 } 5520 } 5521 // Try to emit update expression as a simple atomic. 5522 auto *RHSExpr = UpExpr; 5523 if (RHSExpr) { 5524 // Analyze RHS part of the whole expression. 5525 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 5526 RHSExpr->IgnoreParenImpCasts())) { 5527 // If this is a conditional operator, analyze its condition for 5528 // min/max reduction operator. 5529 RHSExpr = ACO->getCond(); 5530 } 5531 if (auto *BORHS = 5532 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5533 EExpr = BORHS->getRHS(); 5534 BO = BORHS->getOpcode(); 5535 } 5536 } 5537 if (XExpr) { 5538 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5539 auto &&AtomicRedGen = [BO, VD, 5540 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5541 const Expr *EExpr, const Expr *UpExpr) { 5542 LValue X = CGF.EmitLValue(XExpr); 5543 RValue E; 5544 if (EExpr) 5545 E = CGF.EmitAnyExpr(EExpr); 5546 CGF.EmitOMPAtomicSimpleUpdateExpr( 5547 X, E, BO, /*IsXLHSInRHSPart=*/true, 5548 llvm::AtomicOrdering::Monotonic, Loc, 5549 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5550 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5551 PrivateScope.addPrivate( 5552 VD, [&CGF, VD, XRValue, Loc]() -> Address { 5553 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5554 CGF.emitOMPSimpleStore( 5555 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5556 VD->getType().getNonReferenceType(), Loc); 5557 return LHSTemp; 5558 }); 5559 (void)PrivateScope.Privatize(); 5560 return CGF.EmitAnyExpr(UpExpr); 5561 }); 5562 }; 5563 if ((*IPriv)->getType()->isArrayType()) { 5564 // Emit atomic reduction for array section. 5565 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5566 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5567 AtomicRedGen, XExpr, EExpr, UpExpr); 5568 } else 5569 // Emit atomic reduction for array subscript or single variable. 5570 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5571 } else { 5572 // Emit as a critical region. 5573 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5574 const Expr *, const Expr *) { 5575 auto &RT = CGF.CGM.getOpenMPRuntime(); 5576 RT.emitCriticalRegion( 5577 CGF, ".atomic_reduction", 5578 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5579 Action.Enter(CGF); 5580 emitReductionCombiner(CGF, E); 5581 }, 5582 Loc); 5583 }; 5584 if ((*IPriv)->getType()->isArrayType()) { 5585 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5586 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5587 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5588 CritRedGen); 5589 } else 5590 CritRedGen(CGF, nullptr, nullptr, nullptr); 5591 } 5592 ++ILHS; 5593 ++IRHS; 5594 ++IPriv; 5595 } 5596 }; 5597 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5598 if (!WithNowait) { 5599 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5600 llvm::Value *EndArgs[] = { 5601 IdentTLoc, // ident_t *<loc> 5602 ThreadId, // i32 <gtid> 5603 Lock // kmp_critical_name *&<lock> 5604 }; 5605 CommonActionTy Action(nullptr, llvm::None, 5606 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5607 EndArgs); 5608 AtomicRCG.setAction(Action); 5609 AtomicRCG(CGF); 5610 } else 5611 AtomicRCG(CGF); 5612 5613 CGF.EmitBranch(DefaultBB); 5614 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5615 } 5616 5617 /// Generates unique name for artificial threadprivate variables. 5618 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5619 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5620 const Expr *Ref) { 5621 SmallString<256> Buffer; 5622 llvm::raw_svector_ostream Out(Buffer); 5623 const clang::DeclRefExpr *DE; 5624 const VarDecl *D = ::getBaseDecl(Ref, DE); 5625 if (!D) 5626 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5627 D = D->getCanonicalDecl(); 5628 Out << Prefix << "." 5629 << (D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)) 5630 << "_" << D->getCanonicalDecl()->getLocStart().getRawEncoding(); 5631 return Out.str(); 5632 } 5633 5634 /// Emits reduction initializer function: 5635 /// \code 5636 /// void @.red_init(void* %arg) { 5637 /// %0 = bitcast void* %arg to <type>* 5638 /// store <type> <init>, <type>* %0 5639 /// ret void 5640 /// } 5641 /// \endcode 5642 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5643 SourceLocation Loc, 5644 ReductionCodeGen &RCG, unsigned N) { 5645 auto &C = CGM.getContext(); 5646 FunctionArgList Args; 5647 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5648 ImplicitParamDecl::Other); 5649 Args.emplace_back(&Param); 5650 auto &FnInfo = 5651 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5652 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5653 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5654 ".red_init.", &CGM.getModule()); 5655 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5656 CodeGenFunction CGF(CGM); 5657 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5658 Address PrivateAddr = CGF.EmitLoadOfPointer( 5659 CGF.GetAddrOfLocalVar(&Param), 5660 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5661 llvm::Value *Size = nullptr; 5662 // If the size of the reduction item is non-constant, load it from global 5663 // threadprivate variable. 5664 if (RCG.getSizes(N).second) { 5665 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5666 CGF, CGM.getContext().getSizeType(), 5667 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5668 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5669 CGM.getContext().getSizeType(), Loc); 5670 } 5671 RCG.emitAggregateType(CGF, N, Size); 5672 LValue SharedLVal; 5673 // If initializer uses initializer from declare reduction construct, emit a 5674 // pointer to the address of the original reduction item (reuired by reduction 5675 // initializer) 5676 if (RCG.usesReductionInitializer(N)) { 5677 Address SharedAddr = 5678 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5679 CGF, CGM.getContext().VoidPtrTy, 5680 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5681 SharedAddr = CGF.EmitLoadOfPointer( 5682 SharedAddr, 5683 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5684 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5685 } else { 5686 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5687 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5688 CGM.getContext().VoidPtrTy); 5689 } 5690 // Emit the initializer: 5691 // %0 = bitcast void* %arg to <type>* 5692 // store <type> <init>, <type>* %0 5693 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5694 [](CodeGenFunction &) { return false; }); 5695 CGF.FinishFunction(); 5696 return Fn; 5697 } 5698 5699 /// Emits reduction combiner function: 5700 /// \code 5701 /// void @.red_comb(void* %arg0, void* %arg1) { 5702 /// %lhs = bitcast void* %arg0 to <type>* 5703 /// %rhs = bitcast void* %arg1 to <type>* 5704 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5705 /// store <type> %2, <type>* %lhs 5706 /// ret void 5707 /// } 5708 /// \endcode 5709 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5710 SourceLocation Loc, 5711 ReductionCodeGen &RCG, unsigned N, 5712 const Expr *ReductionOp, 5713 const Expr *LHS, const Expr *RHS, 5714 const Expr *PrivateRef) { 5715 auto &C = CGM.getContext(); 5716 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5717 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5718 FunctionArgList Args; 5719 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5720 C.VoidPtrTy, ImplicitParamDecl::Other); 5721 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5722 ImplicitParamDecl::Other); 5723 Args.emplace_back(&ParamInOut); 5724 Args.emplace_back(&ParamIn); 5725 auto &FnInfo = 5726 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5727 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5728 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5729 ".red_comb.", &CGM.getModule()); 5730 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5731 CodeGenFunction CGF(CGM); 5732 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5733 llvm::Value *Size = nullptr; 5734 // If the size of the reduction item is non-constant, load it from global 5735 // threadprivate variable. 5736 if (RCG.getSizes(N).second) { 5737 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5738 CGF, CGM.getContext().getSizeType(), 5739 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5740 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5741 CGM.getContext().getSizeType(), Loc); 5742 } 5743 RCG.emitAggregateType(CGF, N, Size); 5744 // Remap lhs and rhs variables to the addresses of the function arguments. 5745 // %lhs = bitcast void* %arg0 to <type>* 5746 // %rhs = bitcast void* %arg1 to <type>* 5747 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5748 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { 5749 // Pull out the pointer to the variable. 5750 Address PtrAddr = CGF.EmitLoadOfPointer( 5751 CGF.GetAddrOfLocalVar(&ParamInOut), 5752 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5753 return CGF.Builder.CreateElementBitCast( 5754 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5755 }); 5756 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { 5757 // Pull out the pointer to the variable. 5758 Address PtrAddr = CGF.EmitLoadOfPointer( 5759 CGF.GetAddrOfLocalVar(&ParamIn), 5760 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5761 return CGF.Builder.CreateElementBitCast( 5762 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5763 }); 5764 PrivateScope.Privatize(); 5765 // Emit the combiner body: 5766 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5767 // store <type> %2, <type>* %lhs 5768 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5769 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5770 cast<DeclRefExpr>(RHS)); 5771 CGF.FinishFunction(); 5772 return Fn; 5773 } 5774 5775 /// Emits reduction finalizer function: 5776 /// \code 5777 /// void @.red_fini(void* %arg) { 5778 /// %0 = bitcast void* %arg to <type>* 5779 /// <destroy>(<type>* %0) 5780 /// ret void 5781 /// } 5782 /// \endcode 5783 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5784 SourceLocation Loc, 5785 ReductionCodeGen &RCG, unsigned N) { 5786 if (!RCG.needCleanups(N)) 5787 return nullptr; 5788 auto &C = CGM.getContext(); 5789 FunctionArgList Args; 5790 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5791 ImplicitParamDecl::Other); 5792 Args.emplace_back(&Param); 5793 auto &FnInfo = 5794 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5795 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5796 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5797 ".red_fini.", &CGM.getModule()); 5798 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5799 CodeGenFunction CGF(CGM); 5800 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5801 Address PrivateAddr = CGF.EmitLoadOfPointer( 5802 CGF.GetAddrOfLocalVar(&Param), 5803 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5804 llvm::Value *Size = nullptr; 5805 // If the size of the reduction item is non-constant, load it from global 5806 // threadprivate variable. 5807 if (RCG.getSizes(N).second) { 5808 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5809 CGF, CGM.getContext().getSizeType(), 5810 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5811 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5812 CGM.getContext().getSizeType(), Loc); 5813 } 5814 RCG.emitAggregateType(CGF, N, Size); 5815 // Emit the finalizer body: 5816 // <destroy>(<type>* %0) 5817 RCG.emitCleanups(CGF, N, PrivateAddr); 5818 CGF.FinishFunction(); 5819 return Fn; 5820 } 5821 5822 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5823 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5824 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5825 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5826 return nullptr; 5827 5828 // Build typedef struct: 5829 // kmp_task_red_input { 5830 // void *reduce_shar; // shared reduction item 5831 // size_t reduce_size; // size of data item 5832 // void *reduce_init; // data initialization routine 5833 // void *reduce_fini; // data finalization routine 5834 // void *reduce_comb; // data combiner routine 5835 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5836 // } kmp_task_red_input_t; 5837 ASTContext &C = CGM.getContext(); 5838 auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 5839 RD->startDefinition(); 5840 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5841 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5842 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5843 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5844 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5845 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5846 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5847 RD->completeDefinition(); 5848 QualType RDType = C.getRecordType(RD); 5849 unsigned Size = Data.ReductionVars.size(); 5850 llvm::APInt ArraySize(/*numBits=*/64, Size); 5851 QualType ArrayRDType = C.getConstantArrayType( 5852 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 5853 // kmp_task_red_input_t .rd_input.[Size]; 5854 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5855 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 5856 Data.ReductionOps); 5857 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5858 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5859 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5860 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5861 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5862 TaskRedInput.getPointer(), Idxs, 5863 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5864 ".rd_input.gep."); 5865 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5866 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5867 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5868 RCG.emitSharedLValue(CGF, Cnt); 5869 llvm::Value *CastedShared = 5870 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 5871 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5872 RCG.emitAggregateType(CGF, Cnt); 5873 llvm::Value *SizeValInChars; 5874 llvm::Value *SizeVal; 5875 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5876 // We use delayed creation/initialization for VLAs, array sections and 5877 // custom reduction initializations. It is required because runtime does not 5878 // provide the way to pass the sizes of VLAs/array sections to 5879 // initializer/combiner/finalizer functions and does not pass the pointer to 5880 // original reduction item to the initializer. Instead threadprivate global 5881 // variables are used to store these values and use them in the functions. 5882 bool DelayedCreation = !!SizeVal; 5883 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5884 /*isSigned=*/false); 5885 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5886 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5887 // ElemLVal.reduce_init = init; 5888 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5889 llvm::Value *InitAddr = 5890 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 5891 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5892 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 5893 // ElemLVal.reduce_fini = fini; 5894 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5895 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5896 llvm::Value *FiniAddr = Fini 5897 ? CGF.EmitCastToVoidPtr(Fini) 5898 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5899 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5900 // ElemLVal.reduce_comb = comb; 5901 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5902 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 5903 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5904 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 5905 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5906 // ElemLVal.flags = 0; 5907 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5908 if (DelayedCreation) { 5909 CGF.EmitStoreOfScalar( 5910 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 5911 FlagsLVal); 5912 } else 5913 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 5914 } 5915 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 5916 // *data); 5917 llvm::Value *Args[] = { 5918 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5919 /*isSigned=*/true), 5920 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5921 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5922 CGM.VoidPtrTy)}; 5923 return CGF.EmitRuntimeCall( 5924 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 5925 } 5926 5927 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5928 SourceLocation Loc, 5929 ReductionCodeGen &RCG, 5930 unsigned N) { 5931 auto Sizes = RCG.getSizes(N); 5932 // Emit threadprivate global variable if the type is non-constant 5933 // (Sizes.second = nullptr). 5934 if (Sizes.second) { 5935 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5936 /*isSigned=*/false); 5937 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5938 CGF, CGM.getContext().getSizeType(), 5939 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5940 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5941 } 5942 // Store address of the original reduction item if custom initializer is used. 5943 if (RCG.usesReductionInitializer(N)) { 5944 Address SharedAddr = getAddrOfArtificialThreadPrivate( 5945 CGF, CGM.getContext().VoidPtrTy, 5946 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5947 CGF.Builder.CreateStore( 5948 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5949 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 5950 SharedAddr, /*IsVolatile=*/false); 5951 } 5952 } 5953 5954 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5955 SourceLocation Loc, 5956 llvm::Value *ReductionsPtr, 5957 LValue SharedLVal) { 5958 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5959 // *d); 5960 llvm::Value *Args[] = { 5961 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5962 /*isSigned=*/true), 5963 ReductionsPtr, 5964 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 5965 CGM.VoidPtrTy)}; 5966 return Address( 5967 CGF.EmitRuntimeCall( 5968 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 5969 SharedLVal.getAlignment()); 5970 } 5971 5972 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 5973 SourceLocation Loc) { 5974 if (!CGF.HaveInsertPoint()) 5975 return; 5976 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5977 // global_tid); 5978 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 5979 // Ignore return result until untied tasks are supported. 5980 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 5981 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5982 Region->emitUntiedSwitch(CGF); 5983 } 5984 5985 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5986 OpenMPDirectiveKind InnerKind, 5987 const RegionCodeGenTy &CodeGen, 5988 bool HasCancel) { 5989 if (!CGF.HaveInsertPoint()) 5990 return; 5991 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 5992 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5993 } 5994 5995 namespace { 5996 enum RTCancelKind { 5997 CancelNoreq = 0, 5998 CancelParallel = 1, 5999 CancelLoop = 2, 6000 CancelSections = 3, 6001 CancelTaskgroup = 4 6002 }; 6003 } // anonymous namespace 6004 6005 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6006 RTCancelKind CancelKind = CancelNoreq; 6007 if (CancelRegion == OMPD_parallel) 6008 CancelKind = CancelParallel; 6009 else if (CancelRegion == OMPD_for) 6010 CancelKind = CancelLoop; 6011 else if (CancelRegion == OMPD_sections) 6012 CancelKind = CancelSections; 6013 else { 6014 assert(CancelRegion == OMPD_taskgroup); 6015 CancelKind = CancelTaskgroup; 6016 } 6017 return CancelKind; 6018 } 6019 6020 void CGOpenMPRuntime::emitCancellationPointCall( 6021 CodeGenFunction &CGF, SourceLocation Loc, 6022 OpenMPDirectiveKind CancelRegion) { 6023 if (!CGF.HaveInsertPoint()) 6024 return; 6025 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6026 // global_tid, kmp_int32 cncl_kind); 6027 if (auto *OMPRegionInfo = 6028 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6029 // For 'cancellation point taskgroup', the task region info may not have a 6030 // cancel. This may instead happen in another adjacent task. 6031 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6032 llvm::Value *Args[] = { 6033 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6034 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6035 // Ignore return result until untied tasks are supported. 6036 auto *Result = CGF.EmitRuntimeCall( 6037 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6038 // if (__kmpc_cancellationpoint()) { 6039 // exit from construct; 6040 // } 6041 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6042 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 6043 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 6044 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6045 CGF.EmitBlock(ExitBB); 6046 // exit from construct; 6047 auto CancelDest = 6048 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6049 CGF.EmitBranchThroughCleanup(CancelDest); 6050 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6051 } 6052 } 6053 } 6054 6055 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6056 const Expr *IfCond, 6057 OpenMPDirectiveKind CancelRegion) { 6058 if (!CGF.HaveInsertPoint()) 6059 return; 6060 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6061 // kmp_int32 cncl_kind); 6062 if (auto *OMPRegionInfo = 6063 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6064 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6065 PrePostActionTy &) { 6066 auto &RT = CGF.CGM.getOpenMPRuntime(); 6067 llvm::Value *Args[] = { 6068 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6069 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6070 // Ignore return result until untied tasks are supported. 6071 auto *Result = CGF.EmitRuntimeCall( 6072 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6073 // if (__kmpc_cancel()) { 6074 // exit from construct; 6075 // } 6076 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6077 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 6078 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 6079 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6080 CGF.EmitBlock(ExitBB); 6081 // exit from construct; 6082 auto CancelDest = 6083 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6084 CGF.EmitBranchThroughCleanup(CancelDest); 6085 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6086 }; 6087 if (IfCond) 6088 emitOMPIfClause(CGF, IfCond, ThenGen, 6089 [](CodeGenFunction &, PrePostActionTy &) {}); 6090 else { 6091 RegionCodeGenTy ThenRCG(ThenGen); 6092 ThenRCG(CGF); 6093 } 6094 } 6095 } 6096 6097 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6098 const OMPExecutableDirective &D, StringRef ParentName, 6099 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6100 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6101 assert(!ParentName.empty() && "Invalid target region parent name!"); 6102 6103 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6104 IsOffloadEntry, CodeGen); 6105 } 6106 6107 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6108 const OMPExecutableDirective &D, StringRef ParentName, 6109 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6110 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6111 // Create a unique name for the entry function using the source location 6112 // information of the current target region. The name will be something like: 6113 // 6114 // __omp_offloading_DD_FFFF_PP_lBB 6115 // 6116 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6117 // mangled name of the function that encloses the target region and BB is the 6118 // line number of the target region. 6119 6120 unsigned DeviceID; 6121 unsigned FileID; 6122 unsigned Line; 6123 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 6124 Line); 6125 SmallString<64> EntryFnName; 6126 { 6127 llvm::raw_svector_ostream OS(EntryFnName); 6128 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6129 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6130 } 6131 6132 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6133 6134 CodeGenFunction CGF(CGM, true); 6135 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6136 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6137 6138 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6139 6140 // If this target outline function is not an offload entry, we don't need to 6141 // register it. 6142 if (!IsOffloadEntry) 6143 return; 6144 6145 // The target region ID is used by the runtime library to identify the current 6146 // target region, so it only has to be unique and not necessarily point to 6147 // anything. It could be the pointer to the outlined function that implements 6148 // the target region, but we aren't using that so that the compiler doesn't 6149 // need to keep that, and could therefore inline the host function if proven 6150 // worthwhile during optimization. In the other hand, if emitting code for the 6151 // device, the ID has to be the function address so that it can retrieved from 6152 // the offloading entry and launched by the runtime library. We also mark the 6153 // outlined function to have external linkage in case we are emitting code for 6154 // the device, because these functions will be entry points to the device. 6155 6156 if (CGM.getLangOpts().OpenMPIsDevice) { 6157 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6158 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 6159 OutlinedFn->setDSOLocal(false); 6160 } else 6161 OutlinedFnID = new llvm::GlobalVariable( 6162 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6163 llvm::GlobalValue::PrivateLinkage, 6164 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 6165 6166 // Register the information for the entry associated with this target region. 6167 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6168 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6169 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6170 } 6171 6172 /// discard all CompoundStmts intervening between two constructs 6173 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 6174 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 6175 Body = CS->body_front(); 6176 6177 return Body; 6178 } 6179 6180 /// Emit the number of teams for a target directive. Inspect the num_teams 6181 /// clause associated with a teams construct combined or closely nested 6182 /// with the target directive. 6183 /// 6184 /// Emit a team of size one for directives such as 'target parallel' that 6185 /// have no associated teams construct. 6186 /// 6187 /// Otherwise, return nullptr. 6188 static llvm::Value * 6189 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6190 CodeGenFunction &CGF, 6191 const OMPExecutableDirective &D) { 6192 6193 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6194 "teams directive expected to be " 6195 "emitted only for the host!"); 6196 6197 auto &Bld = CGF.Builder; 6198 6199 // If the target directive is combined with a teams directive: 6200 // Return the value in the num_teams clause, if any. 6201 // Otherwise, return 0 to denote the runtime default. 6202 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 6203 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 6204 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6205 auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 6206 /*IgnoreResultAssign*/ true); 6207 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6208 /*IsSigned=*/true); 6209 } 6210 6211 // The default value is 0. 6212 return Bld.getInt32(0); 6213 } 6214 6215 // If the target directive is combined with a parallel directive but not a 6216 // teams directive, start one team. 6217 if (isOpenMPParallelDirective(D.getDirectiveKind())) 6218 return Bld.getInt32(1); 6219 6220 // If the current target region has a teams region enclosed, we need to get 6221 // the number of teams to pass to the runtime function call. This is done 6222 // by generating the expression in a inlined region. This is required because 6223 // the expression is captured in the enclosing target environment when the 6224 // teams directive is not combined with target. 6225 6226 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6227 6228 if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6229 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6230 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6231 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 6232 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6233 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6234 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 6235 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6236 /*IsSigned=*/true); 6237 } 6238 6239 // If we have an enclosed teams directive but no num_teams clause we use 6240 // the default value 0. 6241 return Bld.getInt32(0); 6242 } 6243 } 6244 6245 // No teams associated with the directive. 6246 return nullptr; 6247 } 6248 6249 /// Emit the number of threads for a target directive. Inspect the 6250 /// thread_limit clause associated with a teams construct combined or closely 6251 /// nested with the target directive. 6252 /// 6253 /// Emit the num_threads clause for directives such as 'target parallel' that 6254 /// have no associated teams construct. 6255 /// 6256 /// Otherwise, return nullptr. 6257 static llvm::Value * 6258 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6259 CodeGenFunction &CGF, 6260 const OMPExecutableDirective &D) { 6261 6262 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6263 "teams directive expected to be " 6264 "emitted only for the host!"); 6265 6266 auto &Bld = CGF.Builder; 6267 6268 // 6269 // If the target directive is combined with a teams directive: 6270 // Return the value in the thread_limit clause, if any. 6271 // 6272 // If the target directive is combined with a parallel directive: 6273 // Return the value in the num_threads clause, if any. 6274 // 6275 // If both clauses are set, select the minimum of the two. 6276 // 6277 // If neither teams or parallel combined directives set the number of threads 6278 // in a team, return 0 to denote the runtime default. 6279 // 6280 // If this is not a teams directive return nullptr. 6281 6282 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 6283 isOpenMPParallelDirective(D.getDirectiveKind())) { 6284 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 6285 llvm::Value *NumThreadsVal = nullptr; 6286 llvm::Value *ThreadLimitVal = nullptr; 6287 6288 if (const auto *ThreadLimitClause = 6289 D.getSingleClause<OMPThreadLimitClause>()) { 6290 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6291 auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 6292 /*IgnoreResultAssign*/ true); 6293 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6294 /*IsSigned=*/true); 6295 } 6296 6297 if (const auto *NumThreadsClause = 6298 D.getSingleClause<OMPNumThreadsClause>()) { 6299 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6300 llvm::Value *NumThreads = 6301 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 6302 /*IgnoreResultAssign*/ true); 6303 NumThreadsVal = 6304 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 6305 } 6306 6307 // Select the lesser of thread_limit and num_threads. 6308 if (NumThreadsVal) 6309 ThreadLimitVal = ThreadLimitVal 6310 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 6311 ThreadLimitVal), 6312 NumThreadsVal, ThreadLimitVal) 6313 : NumThreadsVal; 6314 6315 // Set default value passed to the runtime if either teams or a target 6316 // parallel type directive is found but no clause is specified. 6317 if (!ThreadLimitVal) 6318 ThreadLimitVal = DefaultThreadLimitVal; 6319 6320 return ThreadLimitVal; 6321 } 6322 6323 // If the current target region has a teams region enclosed, we need to get 6324 // the thread limit to pass to the runtime function call. This is done 6325 // by generating the expression in a inlined region. This is required because 6326 // the expression is captured in the enclosing target environment when the 6327 // teams directive is not combined with target. 6328 6329 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6330 6331 if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6332 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6333 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6334 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 6335 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6336 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6337 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 6338 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6339 /*IsSigned=*/true); 6340 } 6341 6342 // If we have an enclosed teams directive but no thread_limit clause we 6343 // use the default value 0. 6344 return CGF.Builder.getInt32(0); 6345 } 6346 } 6347 6348 // No teams associated with the directive. 6349 return nullptr; 6350 } 6351 6352 namespace { 6353 // \brief Utility to handle information from clauses associated with a given 6354 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6355 // It provides a convenient interface to obtain the information and generate 6356 // code for that information. 6357 class MappableExprsHandler { 6358 public: 6359 /// \brief Values for bit flags used to specify the mapping type for 6360 /// offloading. 6361 enum OpenMPOffloadMappingFlags { 6362 /// \brief Allocate memory on the device and move data from host to device. 6363 OMP_MAP_TO = 0x01, 6364 /// \brief Allocate memory on the device and move data from device to host. 6365 OMP_MAP_FROM = 0x02, 6366 /// \brief Always perform the requested mapping action on the element, even 6367 /// if it was already mapped before. 6368 OMP_MAP_ALWAYS = 0x04, 6369 /// \brief Delete the element from the device environment, ignoring the 6370 /// current reference count associated with the element. 6371 OMP_MAP_DELETE = 0x08, 6372 /// \brief The element being mapped is a pointer-pointee pair; both the 6373 /// pointer and the pointee should be mapped. 6374 OMP_MAP_PTR_AND_OBJ = 0x10, 6375 /// \brief This flags signals that the base address of an entry should be 6376 /// passed to the target kernel as an argument. 6377 OMP_MAP_TARGET_PARAM = 0x20, 6378 /// \brief Signal that the runtime library has to return the device pointer 6379 /// in the current position for the data being mapped. Used when we have the 6380 /// use_device_ptr clause. 6381 OMP_MAP_RETURN_PARAM = 0x40, 6382 /// \brief This flag signals that the reference being passed is a pointer to 6383 /// private data. 6384 OMP_MAP_PRIVATE = 0x80, 6385 /// \brief Pass the element to the device by value. 6386 OMP_MAP_LITERAL = 0x100, 6387 /// Implicit map 6388 OMP_MAP_IMPLICIT = 0x200, 6389 }; 6390 6391 /// Class that associates information with a base pointer to be passed to the 6392 /// runtime library. 6393 class BasePointerInfo { 6394 /// The base pointer. 6395 llvm::Value *Ptr = nullptr; 6396 /// The base declaration that refers to this device pointer, or null if 6397 /// there is none. 6398 const ValueDecl *DevPtrDecl = nullptr; 6399 6400 public: 6401 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 6402 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 6403 llvm::Value *operator*() const { return Ptr; } 6404 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 6405 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 6406 }; 6407 6408 typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; 6409 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 6410 typedef SmallVector<uint64_t, 16> MapFlagsArrayTy; 6411 6412 private: 6413 /// \brief Directive from where the map clauses were extracted. 6414 const OMPExecutableDirective &CurDir; 6415 6416 /// \brief Function the directive is being generated for. 6417 CodeGenFunction &CGF; 6418 6419 /// \brief Set of all first private variables in the current directive. 6420 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 6421 /// Set of all reduction variables in the current directive. 6422 llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls; 6423 6424 /// Map between device pointer declarations and their expression components. 6425 /// The key value for declarations in 'this' is null. 6426 llvm::DenseMap< 6427 const ValueDecl *, 6428 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6429 DevPointersMap; 6430 6431 llvm::Value *getExprTypeSize(const Expr *E) const { 6432 auto ExprTy = E->getType().getCanonicalType(); 6433 6434 // Reference types are ignored for mapping purposes. 6435 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 6436 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6437 6438 // Given that an array section is considered a built-in type, we need to 6439 // do the calculation based on the length of the section instead of relying 6440 // on CGF.getTypeSize(E->getType()). 6441 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 6442 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 6443 OAE->getBase()->IgnoreParenImpCasts()) 6444 .getCanonicalType(); 6445 6446 // If there is no length associated with the expression, that means we 6447 // are using the whole length of the base. 6448 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 6449 return CGF.getTypeSize(BaseTy); 6450 6451 llvm::Value *ElemSize; 6452 if (auto *PTy = BaseTy->getAs<PointerType>()) 6453 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6454 else { 6455 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6456 assert(ATy && "Expecting array type if not a pointer type."); 6457 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6458 } 6459 6460 // If we don't have a length at this point, that is because we have an 6461 // array section with a single element. 6462 if (!OAE->getLength()) 6463 return ElemSize; 6464 6465 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 6466 LengthVal = 6467 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 6468 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6469 } 6470 return CGF.getTypeSize(ExprTy); 6471 } 6472 6473 /// \brief Return the corresponding bits for a given map clause modifier. Add 6474 /// a flag marking the map as a pointer if requested. Add a flag marking the 6475 /// map as the first one of a series of maps that relate to the same map 6476 /// expression. 6477 uint64_t getMapTypeBits(OpenMPMapClauseKind MapType, 6478 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 6479 bool AddIsTargetParamFlag) const { 6480 uint64_t Bits = 0u; 6481 switch (MapType) { 6482 case OMPC_MAP_alloc: 6483 case OMPC_MAP_release: 6484 // alloc and release is the default behavior in the runtime library, i.e. 6485 // if we don't pass any bits alloc/release that is what the runtime is 6486 // going to do. Therefore, we don't need to signal anything for these two 6487 // type modifiers. 6488 break; 6489 case OMPC_MAP_to: 6490 Bits = OMP_MAP_TO; 6491 break; 6492 case OMPC_MAP_from: 6493 Bits = OMP_MAP_FROM; 6494 break; 6495 case OMPC_MAP_tofrom: 6496 Bits = OMP_MAP_TO | OMP_MAP_FROM; 6497 break; 6498 case OMPC_MAP_delete: 6499 Bits = OMP_MAP_DELETE; 6500 break; 6501 default: 6502 llvm_unreachable("Unexpected map type!"); 6503 break; 6504 } 6505 if (AddPtrFlag) 6506 Bits |= OMP_MAP_PTR_AND_OBJ; 6507 if (AddIsTargetParamFlag) 6508 Bits |= OMP_MAP_TARGET_PARAM; 6509 if (MapTypeModifier == OMPC_MAP_always) 6510 Bits |= OMP_MAP_ALWAYS; 6511 return Bits; 6512 } 6513 6514 /// \brief Return true if the provided expression is a final array section. A 6515 /// final array section, is one whose length can't be proved to be one. 6516 bool isFinalArraySectionExpression(const Expr *E) const { 6517 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 6518 6519 // It is not an array section and therefore not a unity-size one. 6520 if (!OASE) 6521 return false; 6522 6523 // An array section with no colon always refer to a single element. 6524 if (OASE->getColonLoc().isInvalid()) 6525 return false; 6526 6527 auto *Length = OASE->getLength(); 6528 6529 // If we don't have a length we have to check if the array has size 1 6530 // for this dimension. Also, we should always expect a length if the 6531 // base type is pointer. 6532 if (!Length) { 6533 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 6534 OASE->getBase()->IgnoreParenImpCasts()) 6535 .getCanonicalType(); 6536 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6537 return ATy->getSize().getSExtValue() != 1; 6538 // If we don't have a constant dimension length, we have to consider 6539 // the current section as having any size, so it is not necessarily 6540 // unitary. If it happen to be unity size, that's user fault. 6541 return true; 6542 } 6543 6544 // Check if the length evaluates to 1. 6545 llvm::APSInt ConstLength; 6546 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 6547 return true; // Can have more that size 1. 6548 6549 return ConstLength.getSExtValue() != 1; 6550 } 6551 6552 /// \brief Return the adjusted map modifiers if the declaration a capture 6553 /// refers to appears in a first-private clause. This is expected to be used 6554 /// only with directives that start with 'target'. 6555 unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, 6556 unsigned CurrentModifiers) { 6557 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 6558 6559 // A first private variable captured by reference will use only the 6560 // 'private ptr' and 'map to' flag. Return the right flags if the captured 6561 // declaration is known as first-private in this handler. 6562 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 6563 return MappableExprsHandler::OMP_MAP_PRIVATE | 6564 MappableExprsHandler::OMP_MAP_TO; 6565 // Reduction variable will use only the 'private ptr' and 'map to_from' 6566 // flag. 6567 if (ReductionDecls.count(Cap.getCapturedVar())) { 6568 return MappableExprsHandler::OMP_MAP_TO | 6569 MappableExprsHandler::OMP_MAP_FROM; 6570 } 6571 6572 // We didn't modify anything. 6573 return CurrentModifiers; 6574 } 6575 6576 public: 6577 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 6578 : CurDir(Dir), CGF(CGF) { 6579 // Extract firstprivate clause information. 6580 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 6581 for (const auto *D : C->varlists()) 6582 FirstPrivateDecls.insert( 6583 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 6584 for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) { 6585 for (const auto *D : C->varlists()) { 6586 ReductionDecls.insert( 6587 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 6588 } 6589 } 6590 // Extract device pointer clause information. 6591 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 6592 for (auto L : C->component_lists()) 6593 DevPointersMap[L.first].push_back(L.second); 6594 } 6595 6596 /// \brief Generate the base pointers, section pointers, sizes and map type 6597 /// bits for the provided map type, map modifier, and expression components. 6598 /// \a IsFirstComponent should be set to true if the provided set of 6599 /// components is the first associated with a capture. 6600 void generateInfoForComponentList( 6601 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6602 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6603 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 6604 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 6605 bool IsFirstComponentList, bool IsImplicit) const { 6606 6607 // The following summarizes what has to be generated for each map and the 6608 // types bellow. The generated information is expressed in this order: 6609 // base pointer, section pointer, size, flags 6610 // (to add to the ones that come from the map type and modifier). 6611 // 6612 // double d; 6613 // int i[100]; 6614 // float *p; 6615 // 6616 // struct S1 { 6617 // int i; 6618 // float f[50]; 6619 // } 6620 // struct S2 { 6621 // int i; 6622 // float f[50]; 6623 // S1 s; 6624 // double *p; 6625 // struct S2 *ps; 6626 // } 6627 // S2 s; 6628 // S2 *ps; 6629 // 6630 // map(d) 6631 // &d, &d, sizeof(double), noflags 6632 // 6633 // map(i) 6634 // &i, &i, 100*sizeof(int), noflags 6635 // 6636 // map(i[1:23]) 6637 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 6638 // 6639 // map(p) 6640 // &p, &p, sizeof(float*), noflags 6641 // 6642 // map(p[1:24]) 6643 // p, &p[1], 24*sizeof(float), noflags 6644 // 6645 // map(s) 6646 // &s, &s, sizeof(S2), noflags 6647 // 6648 // map(s.i) 6649 // &s, &(s.i), sizeof(int), noflags 6650 // 6651 // map(s.s.f) 6652 // &s, &(s.i.f), 50*sizeof(int), noflags 6653 // 6654 // map(s.p) 6655 // &s, &(s.p), sizeof(double*), noflags 6656 // 6657 // map(s.p[:22], s.a s.b) 6658 // &s, &(s.p), sizeof(double*), noflags 6659 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag 6660 // 6661 // map(s.ps) 6662 // &s, &(s.ps), sizeof(S2*), noflags 6663 // 6664 // map(s.ps->s.i) 6665 // &s, &(s.ps), sizeof(S2*), noflags 6666 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag 6667 // 6668 // map(s.ps->ps) 6669 // &s, &(s.ps), sizeof(S2*), noflags 6670 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6671 // 6672 // map(s.ps->ps->ps) 6673 // &s, &(s.ps), sizeof(S2*), noflags 6674 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6675 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag 6676 // 6677 // map(s.ps->ps->s.f[:22]) 6678 // &s, &(s.ps), sizeof(S2*), noflags 6679 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6680 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag 6681 // 6682 // map(ps) 6683 // &ps, &ps, sizeof(S2*), noflags 6684 // 6685 // map(ps->i) 6686 // ps, &(ps->i), sizeof(int), noflags 6687 // 6688 // map(ps->s.f) 6689 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 6690 // 6691 // map(ps->p) 6692 // ps, &(ps->p), sizeof(double*), noflags 6693 // 6694 // map(ps->p[:22]) 6695 // ps, &(ps->p), sizeof(double*), noflags 6696 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag 6697 // 6698 // map(ps->ps) 6699 // ps, &(ps->ps), sizeof(S2*), noflags 6700 // 6701 // map(ps->ps->s.i) 6702 // ps, &(ps->ps), sizeof(S2*), noflags 6703 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag 6704 // 6705 // map(ps->ps->ps) 6706 // ps, &(ps->ps), sizeof(S2*), noflags 6707 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6708 // 6709 // map(ps->ps->ps->ps) 6710 // ps, &(ps->ps), sizeof(S2*), noflags 6711 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6712 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag 6713 // 6714 // map(ps->ps->ps->s.f[:22]) 6715 // ps, &(ps->ps), sizeof(S2*), noflags 6716 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6717 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag 6718 6719 // Track if the map information being generated is the first for a capture. 6720 bool IsCaptureFirstInfo = IsFirstComponentList; 6721 bool IsLink = false; // Is this variable a "declare target link"? 6722 6723 // Scan the components from the base to the complete expression. 6724 auto CI = Components.rbegin(); 6725 auto CE = Components.rend(); 6726 auto I = CI; 6727 6728 // Track if the map information being generated is the first for a list of 6729 // components. 6730 bool IsExpressionFirstInfo = true; 6731 llvm::Value *BP = nullptr; 6732 6733 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 6734 // The base is the 'this' pointer. The content of the pointer is going 6735 // to be the base of the field being mapped. 6736 BP = CGF.EmitScalarExpr(ME->getBase()); 6737 } else { 6738 // The base is the reference to the variable. 6739 // BP = &Var. 6740 BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); 6741 if (const auto *VD = 6742 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 6743 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 6744 isDeclareTargetDeclaration(VD)) { 6745 assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && 6746 "Declare target link is expected."); 6747 // Avoid warning in release build. 6748 (void)*Res; 6749 IsLink = true; 6750 BP = CGF.CGM.getOpenMPRuntime() 6751 .getAddrOfDeclareTargetLink(VD) 6752 .getPointer(); 6753 } 6754 } 6755 6756 // If the variable is a pointer and is being dereferenced (i.e. is not 6757 // the last component), the base has to be the pointer itself, not its 6758 // reference. References are ignored for mapping purposes. 6759 QualType Ty = 6760 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 6761 if (Ty->isAnyPointerType() && std::next(I) != CE) { 6762 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); 6763 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 6764 Ty->castAs<PointerType>()) 6765 .getPointer(); 6766 6767 // We do not need to generate individual map information for the 6768 // pointer, it can be associated with the combined storage. 6769 ++I; 6770 } 6771 } 6772 6773 uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0; 6774 for (; I != CE; ++I) { 6775 auto Next = std::next(I); 6776 6777 // We need to generate the addresses and sizes if this is the last 6778 // component, if the component is a pointer or if it is an array section 6779 // whose length can't be proved to be one. If this is a pointer, it 6780 // becomes the base address for the following components. 6781 6782 // A final array section, is one whose length can't be proved to be one. 6783 bool IsFinalArraySection = 6784 isFinalArraySectionExpression(I->getAssociatedExpression()); 6785 6786 // Get information on whether the element is a pointer. Have to do a 6787 // special treatment for array sections given that they are built-in 6788 // types. 6789 const auto *OASE = 6790 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 6791 bool IsPointer = 6792 (OASE && 6793 OMPArraySectionExpr::getBaseOriginalType(OASE) 6794 .getCanonicalType() 6795 ->isAnyPointerType()) || 6796 I->getAssociatedExpression()->getType()->isAnyPointerType(); 6797 6798 if (Next == CE || IsPointer || IsFinalArraySection) { 6799 6800 // If this is not the last component, we expect the pointer to be 6801 // associated with an array expression or member expression. 6802 assert((Next == CE || 6803 isa<MemberExpr>(Next->getAssociatedExpression()) || 6804 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 6805 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 6806 "Unexpected expression"); 6807 6808 llvm::Value *LB = 6809 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); 6810 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 6811 6812 // If we have a member expression and the current component is a 6813 // reference, we have to map the reference too. Whenever we have a 6814 // reference, the section that reference refers to is going to be a 6815 // load instruction from the storage assigned to the reference. 6816 if (isa<MemberExpr>(I->getAssociatedExpression()) && 6817 I->getAssociatedDeclaration()->getType()->isReferenceType()) { 6818 auto *LI = cast<llvm::LoadInst>(LB); 6819 auto *RefAddr = LI->getPointerOperand(); 6820 6821 BasePointers.push_back(BP); 6822 Pointers.push_back(RefAddr); 6823 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 6824 Types.push_back(DefaultFlags | 6825 getMapTypeBits( 6826 /*MapType*/ OMPC_MAP_alloc, 6827 /*MapTypeModifier=*/OMPC_MAP_unknown, 6828 !IsExpressionFirstInfo, IsCaptureFirstInfo)); 6829 IsExpressionFirstInfo = false; 6830 IsCaptureFirstInfo = false; 6831 // The reference will be the next base address. 6832 BP = RefAddr; 6833 } 6834 6835 BasePointers.push_back(BP); 6836 Pointers.push_back(LB); 6837 Sizes.push_back(Size); 6838 6839 // We need to add a pointer flag for each map that comes from the 6840 // same expression except for the first one. We also need to signal 6841 // this map is the first one that relates with the current capture 6842 // (there is a set of entries for each capture). 6843 Types.push_back(DefaultFlags | 6844 getMapTypeBits(MapType, MapTypeModifier, 6845 !IsExpressionFirstInfo || IsLink, 6846 IsCaptureFirstInfo && !IsLink)); 6847 6848 // If we have a final array section, we are done with this expression. 6849 if (IsFinalArraySection) 6850 break; 6851 6852 // The pointer becomes the base for the next element. 6853 if (Next != CE) 6854 BP = LB; 6855 6856 IsExpressionFirstInfo = false; 6857 IsCaptureFirstInfo = false; 6858 } 6859 } 6860 } 6861 6862 /// \brief Generate all the base pointers, section pointers, sizes and map 6863 /// types for the extracted mappable expressions. Also, for each item that 6864 /// relates with a device pointer, a pair of the relevant declaration and 6865 /// index where it occurs is appended to the device pointers info array. 6866 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 6867 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 6868 MapFlagsArrayTy &Types) const { 6869 BasePointers.clear(); 6870 Pointers.clear(); 6871 Sizes.clear(); 6872 Types.clear(); 6873 6874 struct MapInfo { 6875 /// Kind that defines how a device pointer has to be returned. 6876 enum ReturnPointerKind { 6877 // Don't have to return any pointer. 6878 RPK_None, 6879 // Pointer is the base of the declaration. 6880 RPK_Base, 6881 // Pointer is a member of the base declaration - 'this' 6882 RPK_Member, 6883 // Pointer is a reference and a member of the base declaration - 'this' 6884 RPK_MemberReference, 6885 }; 6886 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6887 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6888 OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; 6889 ReturnPointerKind ReturnDevicePointer = RPK_None; 6890 bool IsImplicit = false; 6891 6892 MapInfo() = default; 6893 MapInfo( 6894 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6895 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6896 ReturnPointerKind ReturnDevicePointer, bool IsImplicit) 6897 : Components(Components), MapType(MapType), 6898 MapTypeModifier(MapTypeModifier), 6899 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 6900 }; 6901 6902 // We have to process the component lists that relate with the same 6903 // declaration in a single chunk so that we can generate the map flags 6904 // correctly. Therefore, we organize all lists in a map. 6905 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 6906 6907 // Helper function to fill the information map for the different supported 6908 // clauses. 6909 auto &&InfoGen = [&Info]( 6910 const ValueDecl *D, 6911 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 6912 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 6913 MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) { 6914 const ValueDecl *VD = 6915 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 6916 Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, 6917 IsImplicit); 6918 }; 6919 6920 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6921 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 6922 for (auto L : C->component_lists()) { 6923 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 6924 MapInfo::RPK_None, C->isImplicit()); 6925 } 6926 for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 6927 for (auto L : C->component_lists()) { 6928 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 6929 MapInfo::RPK_None, C->isImplicit()); 6930 } 6931 for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 6932 for (auto L : C->component_lists()) { 6933 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 6934 MapInfo::RPK_None, C->isImplicit()); 6935 } 6936 6937 // Look at the use_device_ptr clause information and mark the existing map 6938 // entries as such. If there is no map information for an entry in the 6939 // use_device_ptr list, we create one with map type 'alloc' and zero size 6940 // section. It is the user fault if that was not mapped before. 6941 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6942 for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) 6943 for (auto L : C->component_lists()) { 6944 assert(!L.second.empty() && "Not expecting empty list of components!"); 6945 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 6946 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 6947 auto *IE = L.second.back().getAssociatedExpression(); 6948 // If the first component is a member expression, we have to look into 6949 // 'this', which maps to null in the map of map information. Otherwise 6950 // look directly for the information. 6951 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 6952 6953 // We potentially have map information for this declaration already. 6954 // Look for the first set of components that refer to it. 6955 if (It != Info.end()) { 6956 auto CI = std::find_if( 6957 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 6958 return MI.Components.back().getAssociatedDeclaration() == VD; 6959 }); 6960 // If we found a map entry, signal that the pointer has to be returned 6961 // and move on to the next declaration. 6962 if (CI != It->second.end()) { 6963 CI->ReturnDevicePointer = isa<MemberExpr>(IE) 6964 ? (VD->getType()->isReferenceType() 6965 ? MapInfo::RPK_MemberReference 6966 : MapInfo::RPK_Member) 6967 : MapInfo::RPK_Base; 6968 continue; 6969 } 6970 } 6971 6972 // We didn't find any match in our map information - generate a zero 6973 // size array section. 6974 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 6975 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(IE), 6976 IE->getExprLoc()); 6977 BasePointers.push_back({Ptr, VD}); 6978 Pointers.push_back(Ptr); 6979 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 6980 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 6981 } 6982 6983 for (auto &M : Info) { 6984 // We need to know when we generate information for the first component 6985 // associated with a capture, because the mapping flags depend on it. 6986 bool IsFirstComponentList = true; 6987 for (MapInfo &L : M.second) { 6988 assert(!L.Components.empty() && 6989 "Not expecting declaration with no component lists."); 6990 6991 // Remember the current base pointer index. 6992 unsigned CurrentBasePointersIdx = BasePointers.size(); 6993 // FIXME: MSVC 2013 seems to require this-> to find the member method. 6994 this->generateInfoForComponentList( 6995 L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, 6996 Sizes, Types, IsFirstComponentList, L.IsImplicit); 6997 6998 // If this entry relates with a device pointer, set the relevant 6999 // declaration and add the 'return pointer' flag. 7000 if (IsFirstComponentList && 7001 L.ReturnDevicePointer != MapInfo::RPK_None) { 7002 // If the pointer is not the base of the map, we need to skip the 7003 // base. If it is a reference in a member field, we also need to skip 7004 // the map of the reference. 7005 if (L.ReturnDevicePointer != MapInfo::RPK_Base) { 7006 ++CurrentBasePointersIdx; 7007 if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) 7008 ++CurrentBasePointersIdx; 7009 } 7010 assert(BasePointers.size() > CurrentBasePointersIdx && 7011 "Unexpected number of mapped base pointers."); 7012 7013 auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); 7014 assert(RelevantVD && 7015 "No relevant declaration related with device pointer??"); 7016 7017 BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 7018 Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 7019 } 7020 IsFirstComponentList = false; 7021 } 7022 } 7023 } 7024 7025 /// \brief Generate the base pointers, section pointers, sizes and map types 7026 /// associated to a given capture. 7027 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 7028 llvm::Value *Arg, 7029 MapBaseValuesArrayTy &BasePointers, 7030 MapValuesArrayTy &Pointers, 7031 MapValuesArrayTy &Sizes, 7032 MapFlagsArrayTy &Types) const { 7033 assert(!Cap->capturesVariableArrayType() && 7034 "Not expecting to generate map info for a variable array type!"); 7035 7036 BasePointers.clear(); 7037 Pointers.clear(); 7038 Sizes.clear(); 7039 Types.clear(); 7040 7041 // We need to know when we generating information for the first component 7042 // associated with a capture, because the mapping flags depend on it. 7043 bool IsFirstComponentList = true; 7044 7045 const ValueDecl *VD = 7046 Cap->capturesThis() 7047 ? nullptr 7048 : Cap->getCapturedVar()->getCanonicalDecl(); 7049 7050 // If this declaration appears in a is_device_ptr clause we just have to 7051 // pass the pointer by value. If it is a reference to a declaration, we just 7052 // pass its value, otherwise, if it is a member expression, we need to map 7053 // 'to' the field. 7054 if (!VD) { 7055 auto It = DevPointersMap.find(VD); 7056 if (It != DevPointersMap.end()) { 7057 for (auto L : It->second) { 7058 generateInfoForComponentList( 7059 /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, 7060 BasePointers, Pointers, Sizes, Types, IsFirstComponentList, 7061 /*IsImplicit=*/false); 7062 IsFirstComponentList = false; 7063 } 7064 return; 7065 } 7066 } else if (DevPointersMap.count(VD)) { 7067 BasePointers.push_back({Arg, VD}); 7068 Pointers.push_back(Arg); 7069 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 7070 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 7071 return; 7072 } 7073 7074 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7075 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7076 for (auto L : C->decl_component_lists(VD)) { 7077 assert(L.first == VD && 7078 "We got information for the wrong declaration??"); 7079 assert(!L.second.empty() && 7080 "Not expecting declaration with no component lists."); 7081 generateInfoForComponentList( 7082 C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, 7083 Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit()); 7084 IsFirstComponentList = false; 7085 } 7086 7087 return; 7088 } 7089 7090 /// \brief Generate the default map information for a given capture \a CI, 7091 /// record field declaration \a RI and captured value \a CV. 7092 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 7093 const FieldDecl &RI, llvm::Value *CV, 7094 MapBaseValuesArrayTy &CurBasePointers, 7095 MapValuesArrayTy &CurPointers, 7096 MapValuesArrayTy &CurSizes, 7097 MapFlagsArrayTy &CurMapTypes) { 7098 7099 // Do the default mapping. 7100 if (CI.capturesThis()) { 7101 CurBasePointers.push_back(CV); 7102 CurPointers.push_back(CV); 7103 const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 7104 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 7105 // Default map type. 7106 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 7107 } else if (CI.capturesVariableByCopy()) { 7108 CurBasePointers.push_back(CV); 7109 CurPointers.push_back(CV); 7110 if (!RI.getType()->isAnyPointerType()) { 7111 // We have to signal to the runtime captures passed by value that are 7112 // not pointers. 7113 CurMapTypes.push_back(OMP_MAP_LITERAL); 7114 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 7115 } else { 7116 // Pointers are implicitly mapped with a zero size and no flags 7117 // (other than first map that is added for all implicit maps). 7118 CurMapTypes.push_back(0u); 7119 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 7120 } 7121 } else { 7122 assert(CI.capturesVariable() && "Expected captured reference."); 7123 CurBasePointers.push_back(CV); 7124 CurPointers.push_back(CV); 7125 7126 const ReferenceType *PtrTy = 7127 cast<ReferenceType>(RI.getType().getTypePtr()); 7128 QualType ElementType = PtrTy->getPointeeType(); 7129 CurSizes.push_back(CGF.getTypeSize(ElementType)); 7130 // The default map type for a scalar/complex type is 'to' because by 7131 // default the value doesn't have to be retrieved. For an aggregate 7132 // type, the default is 'tofrom'. 7133 CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses( 7134 CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) 7135 : OMP_MAP_TO)); 7136 } 7137 // Every default map produces a single argument which is a target parameter. 7138 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 7139 } 7140 }; 7141 7142 enum OpenMPOffloadingReservedDeviceIDs { 7143 /// \brief Device ID if the device was not defined, runtime should get it 7144 /// from environment variables in the spec. 7145 OMP_DEVICEID_UNDEF = -1, 7146 }; 7147 } // anonymous namespace 7148 7149 /// \brief Emit the arrays used to pass the captures and map information to the 7150 /// offloading runtime library. If there is no map or capture information, 7151 /// return nullptr by reference. 7152 static void 7153 emitOffloadingArrays(CodeGenFunction &CGF, 7154 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 7155 MappableExprsHandler::MapValuesArrayTy &Pointers, 7156 MappableExprsHandler::MapValuesArrayTy &Sizes, 7157 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 7158 CGOpenMPRuntime::TargetDataInfo &Info) { 7159 auto &CGM = CGF.CGM; 7160 auto &Ctx = CGF.getContext(); 7161 7162 // Reset the array information. 7163 Info.clearArrayInfo(); 7164 Info.NumberOfPtrs = BasePointers.size(); 7165 7166 if (Info.NumberOfPtrs) { 7167 // Detect if we have any capture size requiring runtime evaluation of the 7168 // size so that a constant array could be eventually used. 7169 bool hasRuntimeEvaluationCaptureSize = false; 7170 for (auto *S : Sizes) 7171 if (!isa<llvm::Constant>(S)) { 7172 hasRuntimeEvaluationCaptureSize = true; 7173 break; 7174 } 7175 7176 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 7177 QualType PointerArrayType = 7178 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 7179 /*IndexTypeQuals=*/0); 7180 7181 Info.BasePointersArray = 7182 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 7183 Info.PointersArray = 7184 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 7185 7186 // If we don't have any VLA types or other types that require runtime 7187 // evaluation, we can use a constant array for the map sizes, otherwise we 7188 // need to fill up the arrays as we do for the pointers. 7189 if (hasRuntimeEvaluationCaptureSize) { 7190 QualType SizeArrayType = Ctx.getConstantArrayType( 7191 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 7192 /*IndexTypeQuals=*/0); 7193 Info.SizesArray = 7194 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 7195 } else { 7196 // We expect all the sizes to be constant, so we collect them to create 7197 // a constant array. 7198 SmallVector<llvm::Constant *, 16> ConstSizes; 7199 for (auto S : Sizes) 7200 ConstSizes.push_back(cast<llvm::Constant>(S)); 7201 7202 auto *SizesArrayInit = llvm::ConstantArray::get( 7203 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 7204 auto *SizesArrayGbl = new llvm::GlobalVariable( 7205 CGM.getModule(), SizesArrayInit->getType(), 7206 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 7207 SizesArrayInit, ".offload_sizes"); 7208 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 7209 Info.SizesArray = SizesArrayGbl; 7210 } 7211 7212 // The map types are always constant so we don't need to generate code to 7213 // fill arrays. Instead, we create an array constant. 7214 llvm::Constant *MapTypesArrayInit = 7215 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 7216 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 7217 CGM.getModule(), MapTypesArrayInit->getType(), 7218 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 7219 MapTypesArrayInit, ".offload_maptypes"); 7220 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 7221 Info.MapTypesArray = MapTypesArrayGbl; 7222 7223 for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { 7224 llvm::Value *BPVal = *BasePointers[i]; 7225 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 7226 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7227 Info.BasePointersArray, 0, i); 7228 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7229 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 7230 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 7231 CGF.Builder.CreateStore(BPVal, BPAddr); 7232 7233 if (Info.requiresDevicePointerInfo()) 7234 if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) 7235 Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); 7236 7237 llvm::Value *PVal = Pointers[i]; 7238 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 7239 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7240 Info.PointersArray, 0, i); 7241 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7242 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 7243 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 7244 CGF.Builder.CreateStore(PVal, PAddr); 7245 7246 if (hasRuntimeEvaluationCaptureSize) { 7247 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 7248 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 7249 Info.SizesArray, 7250 /*Idx0=*/0, 7251 /*Idx1=*/i); 7252 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 7253 CGF.Builder.CreateStore( 7254 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 7255 SAddr); 7256 } 7257 } 7258 } 7259 } 7260 /// \brief Emit the arguments to be passed to the runtime library based on the 7261 /// arrays of pointers, sizes and map types. 7262 static void emitOffloadingArraysArgument( 7263 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 7264 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 7265 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 7266 auto &CGM = CGF.CGM; 7267 if (Info.NumberOfPtrs) { 7268 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7269 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7270 Info.BasePointersArray, 7271 /*Idx0=*/0, /*Idx1=*/0); 7272 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7273 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7274 Info.PointersArray, 7275 /*Idx0=*/0, 7276 /*Idx1=*/0); 7277 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7278 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 7279 /*Idx0=*/0, /*Idx1=*/0); 7280 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7281 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 7282 Info.MapTypesArray, 7283 /*Idx0=*/0, 7284 /*Idx1=*/0); 7285 } else { 7286 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 7287 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 7288 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 7289 MapTypesArrayArg = 7290 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 7291 } 7292 } 7293 7294 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 7295 const OMPExecutableDirective &D, 7296 llvm::Value *OutlinedFn, 7297 llvm::Value *OutlinedFnID, 7298 const Expr *IfCond, const Expr *Device) { 7299 if (!CGF.HaveInsertPoint()) 7300 return; 7301 7302 assert(OutlinedFn && "Invalid outlined function!"); 7303 7304 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 7305 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 7306 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 7307 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 7308 PrePostActionTy &) { 7309 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7310 }; 7311 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 7312 7313 CodeGenFunction::OMPTargetDataInfo InputInfo; 7314 llvm::Value *MapTypesArray = nullptr; 7315 // Fill up the pointer arrays and transfer execution to the device. 7316 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 7317 &MapTypesArray, &CS, RequiresOuterTask, 7318 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 7319 // On top of the arrays that were filled up, the target offloading call 7320 // takes as arguments the device id as well as the host pointer. The host 7321 // pointer is used by the runtime library to identify the current target 7322 // region, so it only has to be unique and not necessarily point to 7323 // anything. It could be the pointer to the outlined function that 7324 // implements the target region, but we aren't using that so that the 7325 // compiler doesn't need to keep that, and could therefore inline the host 7326 // function if proven worthwhile during optimization. 7327 7328 // From this point on, we need to have an ID of the target region defined. 7329 assert(OutlinedFnID && "Invalid outlined function ID!"); 7330 7331 // Emit device ID if any. 7332 llvm::Value *DeviceID; 7333 if (Device) { 7334 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7335 CGF.Int64Ty, /*isSigned=*/true); 7336 } else { 7337 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7338 } 7339 7340 // Emit the number of elements in the offloading arrays. 7341 llvm::Value *PointerNum = 7342 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 7343 7344 // Return value of the runtime offloading call. 7345 llvm::Value *Return; 7346 7347 auto *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); 7348 auto *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); 7349 7350 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 7351 // The target region is an outlined function launched by the runtime 7352 // via calls __tgt_target() or __tgt_target_teams(). 7353 // 7354 // __tgt_target() launches a target region with one team and one thread, 7355 // executing a serial region. This master thread may in turn launch 7356 // more threads within its team upon encountering a parallel region, 7357 // however, no additional teams can be launched on the device. 7358 // 7359 // __tgt_target_teams() launches a target region with one or more teams, 7360 // each with one or more threads. This call is required for target 7361 // constructs such as: 7362 // 'target teams' 7363 // 'target' / 'teams' 7364 // 'target teams distribute parallel for' 7365 // 'target parallel' 7366 // and so on. 7367 // 7368 // Note that on the host and CPU targets, the runtime implementation of 7369 // these calls simply call the outlined function without forking threads. 7370 // The outlined functions themselves have runtime calls to 7371 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 7372 // the compiler in emitTeamsCall() and emitParallelCall(). 7373 // 7374 // In contrast, on the NVPTX target, the implementation of 7375 // __tgt_target_teams() launches a GPU kernel with the requested number 7376 // of teams and threads so no additional calls to the runtime are required. 7377 if (NumTeams) { 7378 // If we have NumTeams defined this means that we have an enclosed teams 7379 // region. Therefore we also expect to have NumThreads defined. These two 7380 // values should be defined in the presence of a teams directive, 7381 // regardless of having any clauses associated. If the user is using teams 7382 // but no clauses, these two values will be the default that should be 7383 // passed to the runtime library - a 32-bit integer with the value zero. 7384 assert(NumThreads && "Thread limit expression should be available along " 7385 "with number of teams."); 7386 llvm::Value *OffloadingArgs[] = {DeviceID, 7387 OutlinedFnID, 7388 PointerNum, 7389 InputInfo.BasePointersArray.getPointer(), 7390 InputInfo.PointersArray.getPointer(), 7391 InputInfo.SizesArray.getPointer(), 7392 MapTypesArray, 7393 NumTeams, 7394 NumThreads}; 7395 Return = CGF.EmitRuntimeCall( 7396 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 7397 : OMPRTL__tgt_target_teams), 7398 OffloadingArgs); 7399 } else { 7400 llvm::Value *OffloadingArgs[] = {DeviceID, 7401 OutlinedFnID, 7402 PointerNum, 7403 InputInfo.BasePointersArray.getPointer(), 7404 InputInfo.PointersArray.getPointer(), 7405 InputInfo.SizesArray.getPointer(), 7406 MapTypesArray}; 7407 Return = CGF.EmitRuntimeCall( 7408 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 7409 : OMPRTL__tgt_target), 7410 OffloadingArgs); 7411 } 7412 7413 // Check the error code and execute the host version if required. 7414 llvm::BasicBlock *OffloadFailedBlock = 7415 CGF.createBasicBlock("omp_offload.failed"); 7416 llvm::BasicBlock *OffloadContBlock = 7417 CGF.createBasicBlock("omp_offload.cont"); 7418 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 7419 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 7420 7421 CGF.EmitBlock(OffloadFailedBlock); 7422 if (RequiresOuterTask) { 7423 CapturedVars.clear(); 7424 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7425 } 7426 emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); 7427 CGF.EmitBranch(OffloadContBlock); 7428 7429 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 7430 }; 7431 7432 // Notify that the host version must be executed. 7433 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 7434 RequiresOuterTask](CodeGenFunction &CGF, 7435 PrePostActionTy &) { 7436 if (RequiresOuterTask) { 7437 CapturedVars.clear(); 7438 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7439 } 7440 emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); 7441 }; 7442 7443 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 7444 &CapturedVars, RequiresOuterTask, 7445 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 7446 // Fill up the arrays with all the captured variables. 7447 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7448 MappableExprsHandler::MapValuesArrayTy Pointers; 7449 MappableExprsHandler::MapValuesArrayTy Sizes; 7450 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7451 7452 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 7453 MappableExprsHandler::MapValuesArrayTy CurPointers; 7454 MappableExprsHandler::MapValuesArrayTy CurSizes; 7455 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 7456 7457 // Get mappable expression information. 7458 MappableExprsHandler MEHandler(D, CGF); 7459 7460 auto RI = CS.getCapturedRecordDecl()->field_begin(); 7461 auto CV = CapturedVars.begin(); 7462 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 7463 CE = CS.capture_end(); 7464 CI != CE; ++CI, ++RI, ++CV) { 7465 CurBasePointers.clear(); 7466 CurPointers.clear(); 7467 CurSizes.clear(); 7468 CurMapTypes.clear(); 7469 7470 // VLA sizes are passed to the outlined region by copy and do not have map 7471 // information associated. 7472 if (CI->capturesVariableArrayType()) { 7473 CurBasePointers.push_back(*CV); 7474 CurPointers.push_back(*CV); 7475 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 7476 // Copy to the device as an argument. No need to retrieve it. 7477 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 7478 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 7479 } else { 7480 // If we have any information in the map clause, we use it, otherwise we 7481 // just do a default mapping. 7482 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 7483 CurSizes, CurMapTypes); 7484 if (CurBasePointers.empty()) 7485 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 7486 CurPointers, CurSizes, CurMapTypes); 7487 } 7488 // We expect to have at least an element of information for this capture. 7489 assert(!CurBasePointers.empty() && 7490 "Non-existing map pointer for capture!"); 7491 assert(CurBasePointers.size() == CurPointers.size() && 7492 CurBasePointers.size() == CurSizes.size() && 7493 CurBasePointers.size() == CurMapTypes.size() && 7494 "Inconsistent map information sizes!"); 7495 7496 // We need to append the results of this capture to what we already have. 7497 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7498 Pointers.append(CurPointers.begin(), CurPointers.end()); 7499 Sizes.append(CurSizes.begin(), CurSizes.end()); 7500 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 7501 } 7502 // Map other list items in the map clause which are not captured variables 7503 // but "declare target link" global variables. 7504 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) { 7505 for (auto L : C->component_lists()) { 7506 if (!L.first) 7507 continue; 7508 const auto *VD = dyn_cast<VarDecl>(L.first); 7509 if (!VD) 7510 continue; 7511 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7512 isDeclareTargetDeclaration(VD); 7513 if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 7514 continue; 7515 MEHandler.generateInfoForComponentList( 7516 C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, 7517 Pointers, Sizes, MapTypes, /*IsFirstComponentList=*/true, 7518 C->isImplicit()); 7519 } 7520 } 7521 7522 TargetDataInfo Info; 7523 // Fill up the arrays and create the arguments. 7524 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7525 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 7526 Info.PointersArray, Info.SizesArray, 7527 Info.MapTypesArray, Info); 7528 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 7529 InputInfo.BasePointersArray = 7530 Address(Info.BasePointersArray, CGM.getPointerAlign()); 7531 InputInfo.PointersArray = 7532 Address(Info.PointersArray, CGM.getPointerAlign()); 7533 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 7534 MapTypesArray = Info.MapTypesArray; 7535 if (RequiresOuterTask) 7536 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 7537 else 7538 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 7539 }; 7540 7541 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 7542 CodeGenFunction &CGF, PrePostActionTy &) { 7543 if (RequiresOuterTask) { 7544 CodeGenFunction::OMPTargetDataInfo InputInfo; 7545 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 7546 } else { 7547 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 7548 } 7549 }; 7550 7551 // If we have a target function ID it means that we need to support 7552 // offloading, otherwise, just execute on the host. We need to execute on host 7553 // regardless of the conditional in the if clause if, e.g., the user do not 7554 // specify target triples. 7555 if (OutlinedFnID) { 7556 if (IfCond) { 7557 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 7558 } else { 7559 RegionCodeGenTy ThenRCG(TargetThenGen); 7560 ThenRCG(CGF); 7561 } 7562 } else { 7563 RegionCodeGenTy ElseRCG(TargetElseGen); 7564 ElseRCG(CGF); 7565 } 7566 } 7567 7568 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 7569 StringRef ParentName) { 7570 if (!S) 7571 return; 7572 7573 // Codegen OMP target directives that offload compute to the device. 7574 bool requiresDeviceCodegen = 7575 isa<OMPExecutableDirective>(S) && 7576 isOpenMPTargetExecutionDirective( 7577 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 7578 7579 if (requiresDeviceCodegen) { 7580 auto &E = *cast<OMPExecutableDirective>(S); 7581 unsigned DeviceID; 7582 unsigned FileID; 7583 unsigned Line; 7584 getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, 7585 FileID, Line); 7586 7587 // Is this a target region that should not be emitted as an entry point? If 7588 // so just signal we are done with this target region. 7589 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 7590 ParentName, Line)) 7591 return; 7592 7593 switch (S->getStmtClass()) { 7594 case Stmt::OMPTargetDirectiveClass: 7595 CodeGenFunction::EmitOMPTargetDeviceFunction( 7596 CGM, ParentName, cast<OMPTargetDirective>(*S)); 7597 break; 7598 case Stmt::OMPTargetParallelDirectiveClass: 7599 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7600 CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); 7601 break; 7602 case Stmt::OMPTargetTeamsDirectiveClass: 7603 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 7604 CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); 7605 break; 7606 case Stmt::OMPTargetTeamsDistributeDirectiveClass: 7607 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 7608 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S)); 7609 break; 7610 case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: 7611 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 7612 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S)); 7613 break; 7614 case Stmt::OMPTargetParallelForDirectiveClass: 7615 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 7616 CGM, ParentName, cast<OMPTargetParallelForDirective>(*S)); 7617 break; 7618 case Stmt::OMPTargetParallelForSimdDirectiveClass: 7619 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 7620 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S)); 7621 break; 7622 case Stmt::OMPTargetSimdDirectiveClass: 7623 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 7624 CGM, ParentName, cast<OMPTargetSimdDirective>(*S)); 7625 break; 7626 case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass: 7627 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 7628 CGM, ParentName, 7629 cast<OMPTargetTeamsDistributeParallelForDirective>(*S)); 7630 break; 7631 case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass: 7632 CodeGenFunction:: 7633 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 7634 CGM, ParentName, 7635 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S)); 7636 break; 7637 default: 7638 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 7639 } 7640 return; 7641 } 7642 7643 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 7644 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 7645 return; 7646 7647 scanForTargetRegionsFunctions( 7648 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 7649 return; 7650 } 7651 7652 // If this is a lambda function, look into its body. 7653 if (auto *L = dyn_cast<LambdaExpr>(S)) 7654 S = L->getBody(); 7655 7656 // Keep looking for target regions recursively. 7657 for (auto *II : S->children()) 7658 scanForTargetRegionsFunctions(II, ParentName); 7659 } 7660 7661 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 7662 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 7663 7664 // If emitting code for the host, we do not process FD here. Instead we do 7665 // the normal code generation. 7666 if (!CGM.getLangOpts().OpenMPIsDevice) 7667 return false; 7668 7669 // Try to detect target regions in the function. 7670 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 7671 7672 // Do not to emit function if it is not marked as declare target. 7673 return !isDeclareTargetDeclaration(&FD); 7674 } 7675 7676 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 7677 if (!CGM.getLangOpts().OpenMPIsDevice) 7678 return false; 7679 7680 // Check if there are Ctors/Dtors in this declaration and look for target 7681 // regions in it. We use the complete variant to produce the kernel name 7682 // mangling. 7683 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 7684 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 7685 for (auto *Ctor : RD->ctors()) { 7686 StringRef ParentName = 7687 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 7688 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 7689 } 7690 auto *Dtor = RD->getDestructor(); 7691 if (Dtor) { 7692 StringRef ParentName = 7693 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 7694 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 7695 } 7696 } 7697 7698 // Do not to emit variable if it is not marked as declare target. 7699 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7700 isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl())); 7701 return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link; 7702 } 7703 7704 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 7705 llvm::Constant *Addr) { 7706 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7707 isDeclareTargetDeclaration(VD)) { 7708 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 7709 StringRef VarName; 7710 CharUnits VarSize; 7711 llvm::GlobalValue::LinkageTypes Linkage; 7712 switch (*Res) { 7713 case OMPDeclareTargetDeclAttr::MT_To: 7714 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 7715 VarName = CGM.getMangledName(VD); 7716 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 7717 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 7718 break; 7719 case OMPDeclareTargetDeclAttr::MT_Link: 7720 // Map type 'to' because we do not map the original variable but the 7721 // reference. 7722 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 7723 if (!CGM.getLangOpts().OpenMPIsDevice) { 7724 Addr = 7725 cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); 7726 } 7727 VarName = Addr->getName(); 7728 VarSize = CGM.getPointerSize(); 7729 Linkage = llvm::GlobalValue::WeakAnyLinkage; 7730 break; 7731 } 7732 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 7733 VarName, Addr, VarSize, Flags, Linkage); 7734 } 7735 } 7736 7737 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 7738 auto *VD = GD.getDecl(); 7739 if (isa<FunctionDecl>(VD)) 7740 return emitTargetFunctions(GD); 7741 7742 return emitTargetGlobalVariable(GD); 7743 } 7744 7745 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 7746 CodeGenModule &CGM) 7747 : CGM(CGM) { 7748 if (CGM.getLangOpts().OpenMPIsDevice) { 7749 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 7750 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 7751 } 7752 } 7753 7754 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 7755 if (CGM.getLangOpts().OpenMPIsDevice) 7756 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 7757 } 7758 7759 bool CGOpenMPRuntime::markAsGlobalTarget(const FunctionDecl *D) { 7760 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 7761 return true; 7762 7763 const FunctionDecl *FD = D->getCanonicalDecl(); 7764 // Do not to emit function if it is marked as declare target as it was already 7765 // emitted. 7766 if (isDeclareTargetDeclaration(D)) { 7767 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) { 7768 if (auto *F = dyn_cast_or_null<llvm::Function>( 7769 CGM.GetGlobalValue(CGM.getMangledName(D)))) 7770 return !F->isDeclaration(); 7771 return false; 7772 } 7773 return true; 7774 } 7775 7776 // Do not mark member functions except for static. 7777 if (const auto *Method = dyn_cast<CXXMethodDecl>(FD)) 7778 if (!Method->isStatic()) 7779 return true; 7780 7781 return !AlreadyEmittedTargetFunctions.insert(FD).second; 7782 } 7783 7784 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 7785 // If we have offloading in the current module, we need to emit the entries 7786 // now and register the offloading descriptor. 7787 createOffloadEntriesAndInfoMetadata(); 7788 7789 // Create and register the offloading binary descriptors. This is the main 7790 // entity that captures all the information about offloading in the current 7791 // compilation unit. 7792 return createOffloadingBinaryDescriptorRegistration(); 7793 } 7794 7795 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 7796 const OMPExecutableDirective &D, 7797 SourceLocation Loc, 7798 llvm::Value *OutlinedFn, 7799 ArrayRef<llvm::Value *> CapturedVars) { 7800 if (!CGF.HaveInsertPoint()) 7801 return; 7802 7803 auto *RTLoc = emitUpdateLocation(CGF, Loc); 7804 CodeGenFunction::RunCleanupsScope Scope(CGF); 7805 7806 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 7807 llvm::Value *Args[] = { 7808 RTLoc, 7809 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 7810 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 7811 llvm::SmallVector<llvm::Value *, 16> RealArgs; 7812 RealArgs.append(std::begin(Args), std::end(Args)); 7813 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 7814 7815 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 7816 CGF.EmitRuntimeCall(RTLFn, RealArgs); 7817 } 7818 7819 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 7820 const Expr *NumTeams, 7821 const Expr *ThreadLimit, 7822 SourceLocation Loc) { 7823 if (!CGF.HaveInsertPoint()) 7824 return; 7825 7826 auto *RTLoc = emitUpdateLocation(CGF, Loc); 7827 7828 llvm::Value *NumTeamsVal = 7829 (NumTeams) 7830 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 7831 CGF.CGM.Int32Ty, /* isSigned = */ true) 7832 : CGF.Builder.getInt32(0); 7833 7834 llvm::Value *ThreadLimitVal = 7835 (ThreadLimit) 7836 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 7837 CGF.CGM.Int32Ty, /* isSigned = */ true) 7838 : CGF.Builder.getInt32(0); 7839 7840 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 7841 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 7842 ThreadLimitVal}; 7843 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 7844 PushNumTeamsArgs); 7845 } 7846 7847 void CGOpenMPRuntime::emitTargetDataCalls( 7848 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 7849 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 7850 if (!CGF.HaveInsertPoint()) 7851 return; 7852 7853 // Action used to replace the default codegen action and turn privatization 7854 // off. 7855 PrePostActionTy NoPrivAction; 7856 7857 // Generate the code for the opening of the data environment. Capture all the 7858 // arguments of the runtime call by reference because they are used in the 7859 // closing of the region. 7860 auto &&BeginThenGen = [this, &D, Device, &Info, 7861 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 7862 // Fill up the arrays with all the mapped variables. 7863 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7864 MappableExprsHandler::MapValuesArrayTy Pointers; 7865 MappableExprsHandler::MapValuesArrayTy Sizes; 7866 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7867 7868 // Get map clause information. 7869 MappableExprsHandler MCHandler(D, CGF); 7870 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 7871 7872 // Fill up the arrays and create the arguments. 7873 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7874 7875 llvm::Value *BasePointersArrayArg = nullptr; 7876 llvm::Value *PointersArrayArg = nullptr; 7877 llvm::Value *SizesArrayArg = nullptr; 7878 llvm::Value *MapTypesArrayArg = nullptr; 7879 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 7880 SizesArrayArg, MapTypesArrayArg, Info); 7881 7882 // Emit device ID if any. 7883 llvm::Value *DeviceID = nullptr; 7884 if (Device) { 7885 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7886 CGF.Int64Ty, /*isSigned=*/true); 7887 } else { 7888 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7889 } 7890 7891 // Emit the number of elements in the offloading arrays. 7892 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 7893 7894 llvm::Value *OffloadingArgs[] = { 7895 DeviceID, PointerNum, BasePointersArrayArg, 7896 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 7897 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 7898 OffloadingArgs); 7899 7900 // If device pointer privatization is required, emit the body of the region 7901 // here. It will have to be duplicated: with and without privatization. 7902 if (!Info.CaptureDeviceAddrMap.empty()) 7903 CodeGen(CGF); 7904 }; 7905 7906 // Generate code for the closing of the data region. 7907 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 7908 PrePostActionTy &) { 7909 assert(Info.isValid() && "Invalid data environment closing arguments."); 7910 7911 llvm::Value *BasePointersArrayArg = nullptr; 7912 llvm::Value *PointersArrayArg = nullptr; 7913 llvm::Value *SizesArrayArg = nullptr; 7914 llvm::Value *MapTypesArrayArg = nullptr; 7915 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 7916 SizesArrayArg, MapTypesArrayArg, Info); 7917 7918 // Emit device ID if any. 7919 llvm::Value *DeviceID = nullptr; 7920 if (Device) { 7921 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7922 CGF.Int64Ty, /*isSigned=*/true); 7923 } else { 7924 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7925 } 7926 7927 // Emit the number of elements in the offloading arrays. 7928 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 7929 7930 llvm::Value *OffloadingArgs[] = { 7931 DeviceID, PointerNum, BasePointersArrayArg, 7932 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 7933 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 7934 OffloadingArgs); 7935 }; 7936 7937 // If we need device pointer privatization, we need to emit the body of the 7938 // region with no privatization in the 'else' branch of the conditional. 7939 // Otherwise, we don't have to do anything. 7940 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 7941 PrePostActionTy &) { 7942 if (!Info.CaptureDeviceAddrMap.empty()) { 7943 CodeGen.setAction(NoPrivAction); 7944 CodeGen(CGF); 7945 } 7946 }; 7947 7948 // We don't have to do anything to close the region if the if clause evaluates 7949 // to false. 7950 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 7951 7952 if (IfCond) { 7953 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 7954 } else { 7955 RegionCodeGenTy RCG(BeginThenGen); 7956 RCG(CGF); 7957 } 7958 7959 // If we don't require privatization of device pointers, we emit the body in 7960 // between the runtime calls. This avoids duplicating the body code. 7961 if (Info.CaptureDeviceAddrMap.empty()) { 7962 CodeGen.setAction(NoPrivAction); 7963 CodeGen(CGF); 7964 } 7965 7966 if (IfCond) { 7967 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 7968 } else { 7969 RegionCodeGenTy RCG(EndThenGen); 7970 RCG(CGF); 7971 } 7972 } 7973 7974 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 7975 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 7976 const Expr *Device) { 7977 if (!CGF.HaveInsertPoint()) 7978 return; 7979 7980 assert((isa<OMPTargetEnterDataDirective>(D) || 7981 isa<OMPTargetExitDataDirective>(D) || 7982 isa<OMPTargetUpdateDirective>(D)) && 7983 "Expecting either target enter, exit data, or update directives."); 7984 7985 CodeGenFunction::OMPTargetDataInfo InputInfo; 7986 llvm::Value *MapTypesArray = nullptr; 7987 // Generate the code for the opening of the data environment. 7988 auto &&ThenGen = [this, &D, Device, &InputInfo, 7989 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 7990 // Emit device ID if any. 7991 llvm::Value *DeviceID = nullptr; 7992 if (Device) { 7993 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7994 CGF.Int64Ty, /*isSigned=*/true); 7995 } else { 7996 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7997 } 7998 7999 // Emit the number of elements in the offloading arrays. 8000 llvm::Constant *PointerNum = 8001 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8002 8003 llvm::Value *OffloadingArgs[] = {DeviceID, 8004 PointerNum, 8005 InputInfo.BasePointersArray.getPointer(), 8006 InputInfo.PointersArray.getPointer(), 8007 InputInfo.SizesArray.getPointer(), 8008 MapTypesArray}; 8009 8010 // Select the right runtime function call for each expected standalone 8011 // directive. 8012 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8013 OpenMPRTLFunction RTLFn; 8014 switch (D.getDirectiveKind()) { 8015 default: 8016 llvm_unreachable("Unexpected standalone target data directive."); 8017 break; 8018 case OMPD_target_enter_data: 8019 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 8020 : OMPRTL__tgt_target_data_begin; 8021 break; 8022 case OMPD_target_exit_data: 8023 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 8024 : OMPRTL__tgt_target_data_end; 8025 break; 8026 case OMPD_target_update: 8027 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 8028 : OMPRTL__tgt_target_data_update; 8029 break; 8030 } 8031 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 8032 }; 8033 8034 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 8035 CodeGenFunction &CGF, PrePostActionTy &) { 8036 // Fill up the arrays with all the mapped variables. 8037 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8038 MappableExprsHandler::MapValuesArrayTy Pointers; 8039 MappableExprsHandler::MapValuesArrayTy Sizes; 8040 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8041 8042 // Get map clause information. 8043 MappableExprsHandler MEHandler(D, CGF); 8044 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 8045 8046 TargetDataInfo Info; 8047 // Fill up the arrays and create the arguments. 8048 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8049 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8050 Info.PointersArray, Info.SizesArray, 8051 Info.MapTypesArray, Info); 8052 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8053 InputInfo.BasePointersArray = 8054 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8055 InputInfo.PointersArray = 8056 Address(Info.PointersArray, CGM.getPointerAlign()); 8057 InputInfo.SizesArray = 8058 Address(Info.SizesArray, CGM.getPointerAlign()); 8059 MapTypesArray = Info.MapTypesArray; 8060 if (D.hasClausesOfKind<OMPDependClause>()) 8061 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8062 else 8063 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8064 }; 8065 8066 if (IfCond) 8067 emitOMPIfClause(CGF, IfCond, TargetThenGen, 8068 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 8069 else { 8070 RegionCodeGenTy ThenRCG(TargetThenGen); 8071 ThenRCG(CGF); 8072 } 8073 } 8074 8075 namespace { 8076 /// Kind of parameter in a function with 'declare simd' directive. 8077 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 8078 /// Attribute set of the parameter. 8079 struct ParamAttrTy { 8080 ParamKindTy Kind = Vector; 8081 llvm::APSInt StrideOrArg; 8082 llvm::APSInt Alignment; 8083 }; 8084 } // namespace 8085 8086 static unsigned evaluateCDTSize(const FunctionDecl *FD, 8087 ArrayRef<ParamAttrTy> ParamAttrs) { 8088 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 8089 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 8090 // of that clause. The VLEN value must be power of 2. 8091 // In other case the notion of the function`s "characteristic data type" (CDT) 8092 // is used to compute the vector length. 8093 // CDT is defined in the following order: 8094 // a) For non-void function, the CDT is the return type. 8095 // b) If the function has any non-uniform, non-linear parameters, then the 8096 // CDT is the type of the first such parameter. 8097 // c) If the CDT determined by a) or b) above is struct, union, or class 8098 // type which is pass-by-value (except for the type that maps to the 8099 // built-in complex data type), the characteristic data type is int. 8100 // d) If none of the above three cases is applicable, the CDT is int. 8101 // The VLEN is then determined based on the CDT and the size of vector 8102 // register of that ISA for which current vector version is generated. The 8103 // VLEN is computed using the formula below: 8104 // VLEN = sizeof(vector_register) / sizeof(CDT), 8105 // where vector register size specified in section 3.2.1 Registers and the 8106 // Stack Frame of original AMD64 ABI document. 8107 QualType RetType = FD->getReturnType(); 8108 if (RetType.isNull()) 8109 return 0; 8110 ASTContext &C = FD->getASTContext(); 8111 QualType CDT; 8112 if (!RetType.isNull() && !RetType->isVoidType()) 8113 CDT = RetType; 8114 else { 8115 unsigned Offset = 0; 8116 if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 8117 if (ParamAttrs[Offset].Kind == Vector) 8118 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 8119 ++Offset; 8120 } 8121 if (CDT.isNull()) { 8122 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 8123 if (ParamAttrs[I + Offset].Kind == Vector) { 8124 CDT = FD->getParamDecl(I)->getType(); 8125 break; 8126 } 8127 } 8128 } 8129 } 8130 if (CDT.isNull()) 8131 CDT = C.IntTy; 8132 CDT = CDT->getCanonicalTypeUnqualified(); 8133 if (CDT->isRecordType() || CDT->isUnionType()) 8134 CDT = C.IntTy; 8135 return C.getTypeSize(CDT); 8136 } 8137 8138 static void 8139 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 8140 const llvm::APSInt &VLENVal, 8141 ArrayRef<ParamAttrTy> ParamAttrs, 8142 OMPDeclareSimdDeclAttr::BranchStateTy State) { 8143 struct ISADataTy { 8144 char ISA; 8145 unsigned VecRegSize; 8146 }; 8147 ISADataTy ISAData[] = { 8148 { 8149 'b', 128 8150 }, // SSE 8151 { 8152 'c', 256 8153 }, // AVX 8154 { 8155 'd', 256 8156 }, // AVX2 8157 { 8158 'e', 512 8159 }, // AVX512 8160 }; 8161 llvm::SmallVector<char, 2> Masked; 8162 switch (State) { 8163 case OMPDeclareSimdDeclAttr::BS_Undefined: 8164 Masked.push_back('N'); 8165 Masked.push_back('M'); 8166 break; 8167 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 8168 Masked.push_back('N'); 8169 break; 8170 case OMPDeclareSimdDeclAttr::BS_Inbranch: 8171 Masked.push_back('M'); 8172 break; 8173 } 8174 for (auto Mask : Masked) { 8175 for (auto &Data : ISAData) { 8176 SmallString<256> Buffer; 8177 llvm::raw_svector_ostream Out(Buffer); 8178 Out << "_ZGV" << Data.ISA << Mask; 8179 if (!VLENVal) { 8180 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 8181 evaluateCDTSize(FD, ParamAttrs)); 8182 } else 8183 Out << VLENVal; 8184 for (auto &ParamAttr : ParamAttrs) { 8185 switch (ParamAttr.Kind){ 8186 case LinearWithVarStride: 8187 Out << 's' << ParamAttr.StrideOrArg; 8188 break; 8189 case Linear: 8190 Out << 'l'; 8191 if (!!ParamAttr.StrideOrArg) 8192 Out << ParamAttr.StrideOrArg; 8193 break; 8194 case Uniform: 8195 Out << 'u'; 8196 break; 8197 case Vector: 8198 Out << 'v'; 8199 break; 8200 } 8201 if (!!ParamAttr.Alignment) 8202 Out << 'a' << ParamAttr.Alignment; 8203 } 8204 Out << '_' << Fn->getName(); 8205 Fn->addFnAttr(Out.str()); 8206 } 8207 } 8208 } 8209 8210 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 8211 llvm::Function *Fn) { 8212 ASTContext &C = CGM.getContext(); 8213 FD = FD->getMostRecentDecl(); 8214 // Map params to their positions in function decl. 8215 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 8216 if (isa<CXXMethodDecl>(FD)) 8217 ParamPositions.insert({FD, 0}); 8218 unsigned ParamPos = ParamPositions.size(); 8219 for (auto *P : FD->parameters()) { 8220 ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); 8221 ++ParamPos; 8222 } 8223 while (FD) { 8224 for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 8225 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 8226 // Mark uniform parameters. 8227 for (auto *E : Attr->uniforms()) { 8228 E = E->IgnoreParenImpCasts(); 8229 unsigned Pos; 8230 if (isa<CXXThisExpr>(E)) 8231 Pos = ParamPositions[FD]; 8232 else { 8233 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 8234 ->getCanonicalDecl(); 8235 Pos = ParamPositions[PVD]; 8236 } 8237 ParamAttrs[Pos].Kind = Uniform; 8238 } 8239 // Get alignment info. 8240 auto NI = Attr->alignments_begin(); 8241 for (auto *E : Attr->aligneds()) { 8242 E = E->IgnoreParenImpCasts(); 8243 unsigned Pos; 8244 QualType ParmTy; 8245 if (isa<CXXThisExpr>(E)) { 8246 Pos = ParamPositions[FD]; 8247 ParmTy = E->getType(); 8248 } else { 8249 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 8250 ->getCanonicalDecl(); 8251 Pos = ParamPositions[PVD]; 8252 ParmTy = PVD->getType(); 8253 } 8254 ParamAttrs[Pos].Alignment = 8255 (*NI) 8256 ? (*NI)->EvaluateKnownConstInt(C) 8257 : llvm::APSInt::getUnsigned( 8258 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 8259 .getQuantity()); 8260 ++NI; 8261 } 8262 // Mark linear parameters. 8263 auto SI = Attr->steps_begin(); 8264 auto MI = Attr->modifiers_begin(); 8265 for (auto *E : Attr->linears()) { 8266 E = E->IgnoreParenImpCasts(); 8267 unsigned Pos; 8268 if (isa<CXXThisExpr>(E)) 8269 Pos = ParamPositions[FD]; 8270 else { 8271 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 8272 ->getCanonicalDecl(); 8273 Pos = ParamPositions[PVD]; 8274 } 8275 auto &ParamAttr = ParamAttrs[Pos]; 8276 ParamAttr.Kind = Linear; 8277 if (*SI) { 8278 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 8279 Expr::SE_AllowSideEffects)) { 8280 if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 8281 if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 8282 ParamAttr.Kind = LinearWithVarStride; 8283 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 8284 ParamPositions[StridePVD->getCanonicalDecl()]); 8285 } 8286 } 8287 } 8288 } 8289 ++SI; 8290 ++MI; 8291 } 8292 llvm::APSInt VLENVal; 8293 if (const Expr *VLEN = Attr->getSimdlen()) 8294 VLENVal = VLEN->EvaluateKnownConstInt(C); 8295 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 8296 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 8297 CGM.getTriple().getArch() == llvm::Triple::x86_64) 8298 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 8299 } 8300 FD = FD->getPreviousDecl(); 8301 } 8302 } 8303 8304 namespace { 8305 /// Cleanup action for doacross support. 8306 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 8307 public: 8308 static const int DoacrossFinArgs = 2; 8309 8310 private: 8311 llvm::Value *RTLFn; 8312 llvm::Value *Args[DoacrossFinArgs]; 8313 8314 public: 8315 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 8316 : RTLFn(RTLFn) { 8317 assert(CallArgs.size() == DoacrossFinArgs); 8318 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 8319 } 8320 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 8321 if (!CGF.HaveInsertPoint()) 8322 return; 8323 CGF.EmitRuntimeCall(RTLFn, Args); 8324 } 8325 }; 8326 } // namespace 8327 8328 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 8329 const OMPLoopDirective &D) { 8330 if (!CGF.HaveInsertPoint()) 8331 return; 8332 8333 ASTContext &C = CGM.getContext(); 8334 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8335 RecordDecl *RD; 8336 if (KmpDimTy.isNull()) { 8337 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 8338 // kmp_int64 lo; // lower 8339 // kmp_int64 up; // upper 8340 // kmp_int64 st; // stride 8341 // }; 8342 RD = C.buildImplicitRecord("kmp_dim"); 8343 RD->startDefinition(); 8344 addFieldToRecordDecl(C, RD, Int64Ty); 8345 addFieldToRecordDecl(C, RD, Int64Ty); 8346 addFieldToRecordDecl(C, RD, Int64Ty); 8347 RD->completeDefinition(); 8348 KmpDimTy = C.getRecordType(RD); 8349 } else 8350 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 8351 8352 Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); 8353 CGF.EmitNullInitialization(DimsAddr, KmpDimTy); 8354 enum { LowerFD = 0, UpperFD, StrideFD }; 8355 // Fill dims with data. 8356 LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); 8357 // dims.upper = num_iterations; 8358 LValue UpperLVal = 8359 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); 8360 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 8361 CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), 8362 Int64Ty, D.getNumIterations()->getExprLoc()); 8363 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 8364 // dims.stride = 1; 8365 LValue StrideLVal = 8366 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); 8367 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 8368 StrideLVal); 8369 8370 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 8371 // kmp_int32 num_dims, struct kmp_dim * dims); 8372 llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), 8373 getThreadID(CGF, D.getLocStart()), 8374 llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), 8375 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8376 DimsAddr.getPointer(), CGM.VoidPtrTy)}; 8377 8378 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 8379 CGF.EmitRuntimeCall(RTLFn, Args); 8380 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 8381 emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; 8382 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 8383 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 8384 llvm::makeArrayRef(FiniArgs)); 8385 } 8386 8387 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 8388 const OMPDependClause *C) { 8389 QualType Int64Ty = 8390 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8391 const Expr *CounterVal = C->getCounterValue(); 8392 assert(CounterVal); 8393 llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), 8394 CounterVal->getType(), Int64Ty, 8395 CounterVal->getExprLoc()); 8396 Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); 8397 CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); 8398 llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), 8399 getThreadID(CGF, C->getLocStart()), 8400 CntAddr.getPointer()}; 8401 llvm::Value *RTLFn; 8402 if (C->getDependencyKind() == OMPC_DEPEND_source) 8403 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 8404 else { 8405 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 8406 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 8407 } 8408 CGF.EmitRuntimeCall(RTLFn, Args); 8409 } 8410 8411 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 8412 llvm::Value *Callee, 8413 ArrayRef<llvm::Value *> Args) const { 8414 assert(Loc.isValid() && "Outlined function call location must be valid."); 8415 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 8416 8417 if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { 8418 if (Fn->doesNotThrow()) { 8419 CGF.EmitNounwindRuntimeCall(Fn, Args); 8420 return; 8421 } 8422 } 8423 CGF.EmitRuntimeCall(Callee, Args); 8424 } 8425 8426 void CGOpenMPRuntime::emitOutlinedFunctionCall( 8427 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, 8428 ArrayRef<llvm::Value *> Args) const { 8429 emitCall(CGF, Loc, OutlinedFn, Args); 8430 } 8431 8432 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 8433 const VarDecl *NativeParam, 8434 const VarDecl *TargetParam) const { 8435 return CGF.GetAddrOfLocalVar(NativeParam); 8436 } 8437 8438 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 8439 const VarDecl *VD) { 8440 return Address::invalid(); 8441 } 8442 8443 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 8444 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8445 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 8446 llvm_unreachable("Not supported in SIMD-only mode"); 8447 } 8448 8449 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 8450 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8451 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 8452 llvm_unreachable("Not supported in SIMD-only mode"); 8453 } 8454 8455 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 8456 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8457 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 8458 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 8459 bool Tied, unsigned &NumberOfParts) { 8460 llvm_unreachable("Not supported in SIMD-only mode"); 8461 } 8462 8463 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 8464 SourceLocation Loc, 8465 llvm::Value *OutlinedFn, 8466 ArrayRef<llvm::Value *> CapturedVars, 8467 const Expr *IfCond) { 8468 llvm_unreachable("Not supported in SIMD-only mode"); 8469 } 8470 8471 void CGOpenMPSIMDRuntime::emitCriticalRegion( 8472 CodeGenFunction &CGF, StringRef CriticalName, 8473 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 8474 const Expr *Hint) { 8475 llvm_unreachable("Not supported in SIMD-only mode"); 8476 } 8477 8478 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 8479 const RegionCodeGenTy &MasterOpGen, 8480 SourceLocation Loc) { 8481 llvm_unreachable("Not supported in SIMD-only mode"); 8482 } 8483 8484 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 8485 SourceLocation Loc) { 8486 llvm_unreachable("Not supported in SIMD-only mode"); 8487 } 8488 8489 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 8490 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 8491 SourceLocation Loc) { 8492 llvm_unreachable("Not supported in SIMD-only mode"); 8493 } 8494 8495 void CGOpenMPSIMDRuntime::emitSingleRegion( 8496 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 8497 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 8498 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 8499 ArrayRef<const Expr *> AssignmentOps) { 8500 llvm_unreachable("Not supported in SIMD-only mode"); 8501 } 8502 8503 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 8504 const RegionCodeGenTy &OrderedOpGen, 8505 SourceLocation Loc, 8506 bool IsThreads) { 8507 llvm_unreachable("Not supported in SIMD-only mode"); 8508 } 8509 8510 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 8511 SourceLocation Loc, 8512 OpenMPDirectiveKind Kind, 8513 bool EmitChecks, 8514 bool ForceSimpleCall) { 8515 llvm_unreachable("Not supported in SIMD-only mode"); 8516 } 8517 8518 void CGOpenMPSIMDRuntime::emitForDispatchInit( 8519 CodeGenFunction &CGF, SourceLocation Loc, 8520 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 8521 bool Ordered, const DispatchRTInput &DispatchValues) { 8522 llvm_unreachable("Not supported in SIMD-only mode"); 8523 } 8524 8525 void CGOpenMPSIMDRuntime::emitForStaticInit( 8526 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 8527 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 8528 llvm_unreachable("Not supported in SIMD-only mode"); 8529 } 8530 8531 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 8532 CodeGenFunction &CGF, SourceLocation Loc, 8533 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 8534 llvm_unreachable("Not supported in SIMD-only mode"); 8535 } 8536 8537 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 8538 SourceLocation Loc, 8539 unsigned IVSize, 8540 bool IVSigned) { 8541 llvm_unreachable("Not supported in SIMD-only mode"); 8542 } 8543 8544 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 8545 SourceLocation Loc, 8546 OpenMPDirectiveKind DKind) { 8547 llvm_unreachable("Not supported in SIMD-only mode"); 8548 } 8549 8550 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 8551 SourceLocation Loc, 8552 unsigned IVSize, bool IVSigned, 8553 Address IL, Address LB, 8554 Address UB, Address ST) { 8555 llvm_unreachable("Not supported in SIMD-only mode"); 8556 } 8557 8558 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 8559 llvm::Value *NumThreads, 8560 SourceLocation Loc) { 8561 llvm_unreachable("Not supported in SIMD-only mode"); 8562 } 8563 8564 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 8565 OpenMPProcBindClauseKind ProcBind, 8566 SourceLocation Loc) { 8567 llvm_unreachable("Not supported in SIMD-only mode"); 8568 } 8569 8570 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 8571 const VarDecl *VD, 8572 Address VDAddr, 8573 SourceLocation Loc) { 8574 llvm_unreachable("Not supported in SIMD-only mode"); 8575 } 8576 8577 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 8578 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 8579 CodeGenFunction *CGF) { 8580 llvm_unreachable("Not supported in SIMD-only mode"); 8581 } 8582 8583 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 8584 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 8585 llvm_unreachable("Not supported in SIMD-only mode"); 8586 } 8587 8588 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 8589 ArrayRef<const Expr *> Vars, 8590 SourceLocation Loc) { 8591 llvm_unreachable("Not supported in SIMD-only mode"); 8592 } 8593 8594 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 8595 const OMPExecutableDirective &D, 8596 llvm::Value *TaskFunction, 8597 QualType SharedsTy, Address Shareds, 8598 const Expr *IfCond, 8599 const OMPTaskDataTy &Data) { 8600 llvm_unreachable("Not supported in SIMD-only mode"); 8601 } 8602 8603 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 8604 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 8605 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 8606 const Expr *IfCond, const OMPTaskDataTy &Data) { 8607 llvm_unreachable("Not supported in SIMD-only mode"); 8608 } 8609 8610 void CGOpenMPSIMDRuntime::emitReduction( 8611 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 8612 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 8613 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 8614 assert(Options.SimpleReduction && "Only simple reduction is expected."); 8615 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 8616 ReductionOps, Options); 8617 } 8618 8619 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 8620 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 8621 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 8622 llvm_unreachable("Not supported in SIMD-only mode"); 8623 } 8624 8625 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 8626 SourceLocation Loc, 8627 ReductionCodeGen &RCG, 8628 unsigned N) { 8629 llvm_unreachable("Not supported in SIMD-only mode"); 8630 } 8631 8632 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 8633 SourceLocation Loc, 8634 llvm::Value *ReductionsPtr, 8635 LValue SharedLVal) { 8636 llvm_unreachable("Not supported in SIMD-only mode"); 8637 } 8638 8639 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 8640 SourceLocation Loc) { 8641 llvm_unreachable("Not supported in SIMD-only mode"); 8642 } 8643 8644 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 8645 CodeGenFunction &CGF, SourceLocation Loc, 8646 OpenMPDirectiveKind CancelRegion) { 8647 llvm_unreachable("Not supported in SIMD-only mode"); 8648 } 8649 8650 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 8651 SourceLocation Loc, const Expr *IfCond, 8652 OpenMPDirectiveKind CancelRegion) { 8653 llvm_unreachable("Not supported in SIMD-only mode"); 8654 } 8655 8656 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 8657 const OMPExecutableDirective &D, StringRef ParentName, 8658 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 8659 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 8660 llvm_unreachable("Not supported in SIMD-only mode"); 8661 } 8662 8663 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 8664 const OMPExecutableDirective &D, 8665 llvm::Value *OutlinedFn, 8666 llvm::Value *OutlinedFnID, 8667 const Expr *IfCond, const Expr *Device) { 8668 llvm_unreachable("Not supported in SIMD-only mode"); 8669 } 8670 8671 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 8672 llvm_unreachable("Not supported in SIMD-only mode"); 8673 } 8674 8675 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 8676 llvm_unreachable("Not supported in SIMD-only mode"); 8677 } 8678 8679 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 8680 return false; 8681 } 8682 8683 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 8684 return nullptr; 8685 } 8686 8687 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 8688 const OMPExecutableDirective &D, 8689 SourceLocation Loc, 8690 llvm::Value *OutlinedFn, 8691 ArrayRef<llvm::Value *> CapturedVars) { 8692 llvm_unreachable("Not supported in SIMD-only mode"); 8693 } 8694 8695 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 8696 const Expr *NumTeams, 8697 const Expr *ThreadLimit, 8698 SourceLocation Loc) { 8699 llvm_unreachable("Not supported in SIMD-only mode"); 8700 } 8701 8702 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 8703 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8704 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 8705 llvm_unreachable("Not supported in SIMD-only mode"); 8706 } 8707 8708 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 8709 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8710 const Expr *Device) { 8711 llvm_unreachable("Not supported in SIMD-only mode"); 8712 } 8713 8714 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 8715 const OMPLoopDirective &D) { 8716 llvm_unreachable("Not supported in SIMD-only mode"); 8717 } 8718 8719 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 8720 const OMPDependClause *C) { 8721 llvm_unreachable("Not supported in SIMD-only mode"); 8722 } 8723 8724 const VarDecl * 8725 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 8726 const VarDecl *NativeParam) const { 8727 llvm_unreachable("Not supported in SIMD-only mode"); 8728 } 8729 8730 Address 8731 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 8732 const VarDecl *NativeParam, 8733 const VarDecl *TargetParam) const { 8734 llvm_unreachable("Not supported in SIMD-only mode"); 8735 } 8736 8737