1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CGRecordLayout.h" 18 #include "CodeGenFunction.h" 19 #include "clang/CodeGen/ConstantInitBuilder.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/ADT/BitmaskEnum.h" 24 #include "llvm/Bitcode/BitcodeReader.h" 25 #include "llvm/IR/CallSite.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/GlobalValue.h" 28 #include "llvm/IR/Value.h" 29 #include "llvm/Support/Format.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <cassert> 32 33 using namespace clang; 34 using namespace CodeGen; 35 36 namespace { 37 /// \brief Base class for handling code generation inside OpenMP regions. 38 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 39 public: 40 /// \brief Kinds of OpenMP regions used in codegen. 41 enum CGOpenMPRegionKind { 42 /// \brief Region with outlined function for standalone 'parallel' 43 /// directive. 44 ParallelOutlinedRegion, 45 /// \brief Region with outlined function for standalone 'task' directive. 46 TaskOutlinedRegion, 47 /// \brief Region for constructs that do not require function outlining, 48 /// like 'for', 'sections', 'atomic' etc. directives. 49 InlinedRegion, 50 /// \brief Region with outlined function for standalone 'target' directive. 51 TargetRegion, 52 }; 53 54 CGOpenMPRegionInfo(const CapturedStmt &CS, 55 const CGOpenMPRegionKind RegionKind, 56 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 57 bool HasCancel) 58 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 59 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 60 61 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 62 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 63 bool HasCancel) 64 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 65 Kind(Kind), HasCancel(HasCancel) {} 66 67 /// \brief Get a variable or parameter for storing global thread id 68 /// inside OpenMP construct. 69 virtual const VarDecl *getThreadIDVariable() const = 0; 70 71 /// \brief Emit the captured statement body. 72 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 73 74 /// \brief Get an LValue for the current ThreadID variable. 75 /// \return LValue for thread id variable. This LValue always has type int32*. 76 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 77 78 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 79 80 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 81 82 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 83 84 bool hasCancel() const { return HasCancel; } 85 86 static bool classof(const CGCapturedStmtInfo *Info) { 87 return Info->getKind() == CR_OpenMP; 88 } 89 90 ~CGOpenMPRegionInfo() override = default; 91 92 protected: 93 CGOpenMPRegionKind RegionKind; 94 RegionCodeGenTy CodeGen; 95 OpenMPDirectiveKind Kind; 96 bool HasCancel; 97 }; 98 99 /// \brief API for captured statement code generation in OpenMP constructs. 100 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 101 public: 102 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 103 const RegionCodeGenTy &CodeGen, 104 OpenMPDirectiveKind Kind, bool HasCancel, 105 StringRef HelperName) 106 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 107 HasCancel), 108 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 109 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 110 } 111 112 /// \brief Get a variable or parameter for storing global thread id 113 /// inside OpenMP construct. 114 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 115 116 /// \brief Get the name of the capture helper. 117 StringRef getHelperName() const override { return HelperName; } 118 119 static bool classof(const CGCapturedStmtInfo *Info) { 120 return CGOpenMPRegionInfo::classof(Info) && 121 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 122 ParallelOutlinedRegion; 123 } 124 125 private: 126 /// \brief A variable or parameter storing global thread id for OpenMP 127 /// constructs. 128 const VarDecl *ThreadIDVar; 129 StringRef HelperName; 130 }; 131 132 /// \brief API for captured statement code generation in OpenMP constructs. 133 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 134 public: 135 class UntiedTaskActionTy final : public PrePostActionTy { 136 bool Untied; 137 const VarDecl *PartIDVar; 138 const RegionCodeGenTy UntiedCodeGen; 139 llvm::SwitchInst *UntiedSwitch = nullptr; 140 141 public: 142 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 143 const RegionCodeGenTy &UntiedCodeGen) 144 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 145 void Enter(CodeGenFunction &CGF) override { 146 if (Untied) { 147 // Emit task switching point. 148 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 149 CGF.GetAddrOfLocalVar(PartIDVar), 150 PartIDVar->getType()->castAs<PointerType>()); 151 llvm::Value *Res = 152 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 153 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 154 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 155 CGF.EmitBlock(DoneBB); 156 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 157 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 158 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 159 CGF.Builder.GetInsertBlock()); 160 emitUntiedSwitch(CGF); 161 } 162 } 163 void emitUntiedSwitch(CodeGenFunction &CGF) const { 164 if (Untied) { 165 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 166 CGF.GetAddrOfLocalVar(PartIDVar), 167 PartIDVar->getType()->castAs<PointerType>()); 168 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 169 PartIdLVal); 170 UntiedCodeGen(CGF); 171 CodeGenFunction::JumpDest CurPoint = 172 CGF.getJumpDestInCurrentScope(".untied.next."); 173 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 174 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 175 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 176 CGF.Builder.GetInsertBlock()); 177 CGF.EmitBranchThroughCleanup(CurPoint); 178 CGF.EmitBlock(CurPoint.getBlock()); 179 } 180 } 181 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 182 }; 183 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 184 const VarDecl *ThreadIDVar, 185 const RegionCodeGenTy &CodeGen, 186 OpenMPDirectiveKind Kind, bool HasCancel, 187 const UntiedTaskActionTy &Action) 188 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 189 ThreadIDVar(ThreadIDVar), Action(Action) { 190 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 191 } 192 193 /// \brief Get a variable or parameter for storing global thread id 194 /// inside OpenMP construct. 195 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 196 197 /// \brief Get an LValue for the current ThreadID variable. 198 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 199 200 /// \brief Get the name of the capture helper. 201 StringRef getHelperName() const override { return ".omp_outlined."; } 202 203 void emitUntiedSwitch(CodeGenFunction &CGF) override { 204 Action.emitUntiedSwitch(CGF); 205 } 206 207 static bool classof(const CGCapturedStmtInfo *Info) { 208 return CGOpenMPRegionInfo::classof(Info) && 209 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 210 TaskOutlinedRegion; 211 } 212 213 private: 214 /// \brief A variable or parameter storing global thread id for OpenMP 215 /// constructs. 216 const VarDecl *ThreadIDVar; 217 /// Action for emitting code for untied tasks. 218 const UntiedTaskActionTy &Action; 219 }; 220 221 /// \brief API for inlined captured statement code generation in OpenMP 222 /// constructs. 223 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 224 public: 225 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 226 const RegionCodeGenTy &CodeGen, 227 OpenMPDirectiveKind Kind, bool HasCancel) 228 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 229 OldCSI(OldCSI), 230 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 231 232 // \brief Retrieve the value of the context parameter. 233 llvm::Value *getContextValue() const override { 234 if (OuterRegionInfo) 235 return OuterRegionInfo->getContextValue(); 236 llvm_unreachable("No context value for inlined OpenMP region"); 237 } 238 239 void setContextValue(llvm::Value *V) override { 240 if (OuterRegionInfo) { 241 OuterRegionInfo->setContextValue(V); 242 return; 243 } 244 llvm_unreachable("No context value for inlined OpenMP region"); 245 } 246 247 /// \brief Lookup the captured field decl for a variable. 248 const FieldDecl *lookup(const VarDecl *VD) const override { 249 if (OuterRegionInfo) 250 return OuterRegionInfo->lookup(VD); 251 // If there is no outer outlined region,no need to lookup in a list of 252 // captured variables, we can use the original one. 253 return nullptr; 254 } 255 256 FieldDecl *getThisFieldDecl() const override { 257 if (OuterRegionInfo) 258 return OuterRegionInfo->getThisFieldDecl(); 259 return nullptr; 260 } 261 262 /// \brief Get a variable or parameter for storing global thread id 263 /// inside OpenMP construct. 264 const VarDecl *getThreadIDVariable() const override { 265 if (OuterRegionInfo) 266 return OuterRegionInfo->getThreadIDVariable(); 267 return nullptr; 268 } 269 270 /// \brief Get an LValue for the current ThreadID variable. 271 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 272 if (OuterRegionInfo) 273 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 274 llvm_unreachable("No LValue for inlined OpenMP construct"); 275 } 276 277 /// \brief Get the name of the capture helper. 278 StringRef getHelperName() const override { 279 if (auto *OuterRegionInfo = getOldCSI()) 280 return OuterRegionInfo->getHelperName(); 281 llvm_unreachable("No helper name for inlined OpenMP construct"); 282 } 283 284 void emitUntiedSwitch(CodeGenFunction &CGF) override { 285 if (OuterRegionInfo) 286 OuterRegionInfo->emitUntiedSwitch(CGF); 287 } 288 289 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 290 291 static bool classof(const CGCapturedStmtInfo *Info) { 292 return CGOpenMPRegionInfo::classof(Info) && 293 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 294 } 295 296 ~CGOpenMPInlinedRegionInfo() override = default; 297 298 private: 299 /// \brief CodeGen info about outer OpenMP region. 300 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 301 CGOpenMPRegionInfo *OuterRegionInfo; 302 }; 303 304 /// \brief API for captured statement code generation in OpenMP target 305 /// constructs. For this captures, implicit parameters are used instead of the 306 /// captured fields. The name of the target region has to be unique in a given 307 /// application so it is provided by the client, because only the client has 308 /// the information to generate that. 309 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 310 public: 311 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 312 const RegionCodeGenTy &CodeGen, StringRef HelperName) 313 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 314 /*HasCancel=*/false), 315 HelperName(HelperName) {} 316 317 /// \brief This is unused for target regions because each starts executing 318 /// with a single thread. 319 const VarDecl *getThreadIDVariable() const override { return nullptr; } 320 321 /// \brief Get the name of the capture helper. 322 StringRef getHelperName() const override { return HelperName; } 323 324 static bool classof(const CGCapturedStmtInfo *Info) { 325 return CGOpenMPRegionInfo::classof(Info) && 326 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 327 } 328 329 private: 330 StringRef HelperName; 331 }; 332 333 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 334 llvm_unreachable("No codegen for expressions"); 335 } 336 /// \brief API for generation of expressions captured in a innermost OpenMP 337 /// region. 338 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 339 public: 340 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 341 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 342 OMPD_unknown, 343 /*HasCancel=*/false), 344 PrivScope(CGF) { 345 // Make sure the globals captured in the provided statement are local by 346 // using the privatization logic. We assume the same variable is not 347 // captured more than once. 348 for (const auto &C : CS.captures()) { 349 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 350 continue; 351 352 const VarDecl *VD = C.getCapturedVar(); 353 if (VD->isLocalVarDeclOrParm()) 354 continue; 355 356 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 357 /*RefersToEnclosingVariableOrCapture=*/false, 358 VD->getType().getNonReferenceType(), VK_LValue, 359 C.getLocation()); 360 PrivScope.addPrivate( 361 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 362 } 363 (void)PrivScope.Privatize(); 364 } 365 366 /// \brief Lookup the captured field decl for a variable. 367 const FieldDecl *lookup(const VarDecl *VD) const override { 368 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 369 return FD; 370 return nullptr; 371 } 372 373 /// \brief Emit the captured statement body. 374 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 375 llvm_unreachable("No body for expressions"); 376 } 377 378 /// \brief Get a variable or parameter for storing global thread id 379 /// inside OpenMP construct. 380 const VarDecl *getThreadIDVariable() const override { 381 llvm_unreachable("No thread id for expressions"); 382 } 383 384 /// \brief Get the name of the capture helper. 385 StringRef getHelperName() const override { 386 llvm_unreachable("No helper name for expressions"); 387 } 388 389 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 390 391 private: 392 /// Private scope to capture global variables. 393 CodeGenFunction::OMPPrivateScope PrivScope; 394 }; 395 396 /// \brief RAII for emitting code of OpenMP constructs. 397 class InlinedOpenMPRegionRAII { 398 CodeGenFunction &CGF; 399 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 400 FieldDecl *LambdaThisCaptureField = nullptr; 401 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 402 403 public: 404 /// \brief Constructs region for combined constructs. 405 /// \param CodeGen Code generation sequence for combined directives. Includes 406 /// a list of functions used for code generation of implicitly inlined 407 /// regions. 408 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 409 OpenMPDirectiveKind Kind, bool HasCancel) 410 : CGF(CGF) { 411 // Start emission for the construct. 412 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 413 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 414 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 415 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 416 CGF.LambdaThisCaptureField = nullptr; 417 BlockInfo = CGF.BlockInfo; 418 CGF.BlockInfo = nullptr; 419 } 420 421 ~InlinedOpenMPRegionRAII() { 422 // Restore original CapturedStmtInfo only if we're done with code emission. 423 auto *OldCSI = 424 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 425 delete CGF.CapturedStmtInfo; 426 CGF.CapturedStmtInfo = OldCSI; 427 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 428 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 429 CGF.BlockInfo = BlockInfo; 430 } 431 }; 432 433 /// \brief Values for bit flags used in the ident_t to describe the fields. 434 /// All enumeric elements are named and described in accordance with the code 435 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 436 enum OpenMPLocationFlags : unsigned { 437 /// \brief Use trampoline for internal microtask. 438 OMP_IDENT_IMD = 0x01, 439 /// \brief Use c-style ident structure. 440 OMP_IDENT_KMPC = 0x02, 441 /// \brief Atomic reduction option for kmpc_reduce. 442 OMP_ATOMIC_REDUCE = 0x10, 443 /// \brief Explicit 'barrier' directive. 444 OMP_IDENT_BARRIER_EXPL = 0x20, 445 /// \brief Implicit barrier in code. 446 OMP_IDENT_BARRIER_IMPL = 0x40, 447 /// \brief Implicit barrier in 'for' directive. 448 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 449 /// \brief Implicit barrier in 'sections' directive. 450 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 451 /// \brief Implicit barrier in 'single' directive. 452 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 453 /// Call of __kmp_for_static_init for static loop. 454 OMP_IDENT_WORK_LOOP = 0x200, 455 /// Call of __kmp_for_static_init for sections. 456 OMP_IDENT_WORK_SECTIONS = 0x400, 457 /// Call of __kmp_for_static_init for distribute. 458 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 459 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 460 }; 461 462 /// \brief Describes ident structure that describes a source location. 463 /// All descriptions are taken from 464 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 465 /// Original structure: 466 /// typedef struct ident { 467 /// kmp_int32 reserved_1; /**< might be used in Fortran; 468 /// see above */ 469 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 470 /// KMP_IDENT_KMPC identifies this union 471 /// member */ 472 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 473 /// see above */ 474 ///#if USE_ITT_BUILD 475 /// /* but currently used for storing 476 /// region-specific ITT */ 477 /// /* contextual information. */ 478 ///#endif /* USE_ITT_BUILD */ 479 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 480 /// C++ */ 481 /// char const *psource; /**< String describing the source location. 482 /// The string is composed of semi-colon separated 483 // fields which describe the source file, 484 /// the function and a pair of line numbers that 485 /// delimit the construct. 486 /// */ 487 /// } ident_t; 488 enum IdentFieldIndex { 489 /// \brief might be used in Fortran 490 IdentField_Reserved_1, 491 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 492 IdentField_Flags, 493 /// \brief Not really used in Fortran any more 494 IdentField_Reserved_2, 495 /// \brief Source[4] in Fortran, do not use for C++ 496 IdentField_Reserved_3, 497 /// \brief String describing the source location. The string is composed of 498 /// semi-colon separated fields which describe the source file, the function 499 /// and a pair of line numbers that delimit the construct. 500 IdentField_PSource 501 }; 502 503 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 504 /// the enum sched_type in kmp.h). 505 enum OpenMPSchedType { 506 /// \brief Lower bound for default (unordered) versions. 507 OMP_sch_lower = 32, 508 OMP_sch_static_chunked = 33, 509 OMP_sch_static = 34, 510 OMP_sch_dynamic_chunked = 35, 511 OMP_sch_guided_chunked = 36, 512 OMP_sch_runtime = 37, 513 OMP_sch_auto = 38, 514 /// static with chunk adjustment (e.g., simd) 515 OMP_sch_static_balanced_chunked = 45, 516 /// \brief Lower bound for 'ordered' versions. 517 OMP_ord_lower = 64, 518 OMP_ord_static_chunked = 65, 519 OMP_ord_static = 66, 520 OMP_ord_dynamic_chunked = 67, 521 OMP_ord_guided_chunked = 68, 522 OMP_ord_runtime = 69, 523 OMP_ord_auto = 70, 524 OMP_sch_default = OMP_sch_static, 525 /// \brief dist_schedule types 526 OMP_dist_sch_static_chunked = 91, 527 OMP_dist_sch_static = 92, 528 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 529 /// Set if the monotonic schedule modifier was present. 530 OMP_sch_modifier_monotonic = (1 << 29), 531 /// Set if the nonmonotonic schedule modifier was present. 532 OMP_sch_modifier_nonmonotonic = (1 << 30), 533 }; 534 535 enum OpenMPRTLFunction { 536 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 537 /// kmpc_micro microtask, ...); 538 OMPRTL__kmpc_fork_call, 539 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 540 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 541 OMPRTL__kmpc_threadprivate_cached, 542 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 543 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 544 OMPRTL__kmpc_threadprivate_register, 545 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 546 OMPRTL__kmpc_global_thread_num, 547 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 548 // kmp_critical_name *crit); 549 OMPRTL__kmpc_critical, 550 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 551 // global_tid, kmp_critical_name *crit, uintptr_t hint); 552 OMPRTL__kmpc_critical_with_hint, 553 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 554 // kmp_critical_name *crit); 555 OMPRTL__kmpc_end_critical, 556 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 557 // global_tid); 558 OMPRTL__kmpc_cancel_barrier, 559 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 560 OMPRTL__kmpc_barrier, 561 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 562 OMPRTL__kmpc_for_static_fini, 563 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 564 // global_tid); 565 OMPRTL__kmpc_serialized_parallel, 566 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 567 // global_tid); 568 OMPRTL__kmpc_end_serialized_parallel, 569 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 570 // kmp_int32 num_threads); 571 OMPRTL__kmpc_push_num_threads, 572 // Call to void __kmpc_flush(ident_t *loc); 573 OMPRTL__kmpc_flush, 574 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 575 OMPRTL__kmpc_master, 576 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 577 OMPRTL__kmpc_end_master, 578 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 579 // int end_part); 580 OMPRTL__kmpc_omp_taskyield, 581 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 582 OMPRTL__kmpc_single, 583 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 584 OMPRTL__kmpc_end_single, 585 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 586 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 587 // kmp_routine_entry_t *task_entry); 588 OMPRTL__kmpc_omp_task_alloc, 589 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 590 // new_task); 591 OMPRTL__kmpc_omp_task, 592 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 593 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 594 // kmp_int32 didit); 595 OMPRTL__kmpc_copyprivate, 596 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 597 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 598 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 599 OMPRTL__kmpc_reduce, 600 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 601 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 602 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 603 // *lck); 604 OMPRTL__kmpc_reduce_nowait, 605 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 606 // kmp_critical_name *lck); 607 OMPRTL__kmpc_end_reduce, 608 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 609 // kmp_critical_name *lck); 610 OMPRTL__kmpc_end_reduce_nowait, 611 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 612 // kmp_task_t * new_task); 613 OMPRTL__kmpc_omp_task_begin_if0, 614 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 615 // kmp_task_t * new_task); 616 OMPRTL__kmpc_omp_task_complete_if0, 617 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 618 OMPRTL__kmpc_ordered, 619 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 620 OMPRTL__kmpc_end_ordered, 621 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 622 // global_tid); 623 OMPRTL__kmpc_omp_taskwait, 624 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 625 OMPRTL__kmpc_taskgroup, 626 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 627 OMPRTL__kmpc_end_taskgroup, 628 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 629 // int proc_bind); 630 OMPRTL__kmpc_push_proc_bind, 631 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 632 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 633 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 634 OMPRTL__kmpc_omp_task_with_deps, 635 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 636 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 637 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 638 OMPRTL__kmpc_omp_wait_deps, 639 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 640 // global_tid, kmp_int32 cncl_kind); 641 OMPRTL__kmpc_cancellationpoint, 642 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 643 // kmp_int32 cncl_kind); 644 OMPRTL__kmpc_cancel, 645 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 646 // kmp_int32 num_teams, kmp_int32 thread_limit); 647 OMPRTL__kmpc_push_num_teams, 648 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 649 // microtask, ...); 650 OMPRTL__kmpc_fork_teams, 651 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 652 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 653 // sched, kmp_uint64 grainsize, void *task_dup); 654 OMPRTL__kmpc_taskloop, 655 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 656 // num_dims, struct kmp_dim *dims); 657 OMPRTL__kmpc_doacross_init, 658 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 659 OMPRTL__kmpc_doacross_fini, 660 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 661 // *vec); 662 OMPRTL__kmpc_doacross_post, 663 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 664 // *vec); 665 OMPRTL__kmpc_doacross_wait, 666 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 667 // *data); 668 OMPRTL__kmpc_task_reduction_init, 669 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 670 // *d); 671 OMPRTL__kmpc_task_reduction_get_th_data, 672 673 // 674 // Offloading related calls 675 // 676 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 677 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 678 // *arg_types); 679 OMPRTL__tgt_target, 680 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 681 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 682 // *arg_types); 683 OMPRTL__tgt_target_nowait, 684 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 685 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 686 // *arg_types, int32_t num_teams, int32_t thread_limit); 687 OMPRTL__tgt_target_teams, 688 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 689 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 690 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 691 OMPRTL__tgt_target_teams_nowait, 692 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 693 OMPRTL__tgt_register_lib, 694 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 695 OMPRTL__tgt_unregister_lib, 696 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 697 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 698 OMPRTL__tgt_target_data_begin, 699 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 700 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 701 // *arg_types); 702 OMPRTL__tgt_target_data_begin_nowait, 703 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 704 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 705 OMPRTL__tgt_target_data_end, 706 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 707 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 708 // *arg_types); 709 OMPRTL__tgt_target_data_end_nowait, 710 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 711 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 712 OMPRTL__tgt_target_data_update, 713 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 714 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 715 // *arg_types); 716 OMPRTL__tgt_target_data_update_nowait, 717 }; 718 719 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 720 /// region. 721 class CleanupTy final : public EHScopeStack::Cleanup { 722 PrePostActionTy *Action; 723 724 public: 725 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 726 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 727 if (!CGF.HaveInsertPoint()) 728 return; 729 Action->Exit(CGF); 730 } 731 }; 732 733 } // anonymous namespace 734 735 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 736 CodeGenFunction::RunCleanupsScope Scope(CGF); 737 if (PrePostAction) { 738 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 739 Callback(CodeGen, CGF, *PrePostAction); 740 } else { 741 PrePostActionTy Action; 742 Callback(CodeGen, CGF, Action); 743 } 744 } 745 746 /// Check if the combiner is a call to UDR combiner and if it is so return the 747 /// UDR decl used for reduction. 748 static const OMPDeclareReductionDecl * 749 getReductionInit(const Expr *ReductionOp) { 750 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 751 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 752 if (const auto *DRE = 753 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 754 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 755 return DRD; 756 return nullptr; 757 } 758 759 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 760 const OMPDeclareReductionDecl *DRD, 761 const Expr *InitOp, 762 Address Private, Address Original, 763 QualType Ty) { 764 if (DRD->getInitializer()) { 765 std::pair<llvm::Function *, llvm::Function *> Reduction = 766 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 767 const auto *CE = cast<CallExpr>(InitOp); 768 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 769 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 770 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 771 const auto *LHSDRE = 772 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 773 const auto *RHSDRE = 774 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 775 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 776 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 777 [=]() { return Private; }); 778 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 779 [=]() { return Original; }); 780 (void)PrivateScope.Privatize(); 781 RValue Func = RValue::get(Reduction.second); 782 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 783 CGF.EmitIgnoredExpr(InitOp); 784 } else { 785 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 786 auto *GV = new llvm::GlobalVariable( 787 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 788 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 789 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 790 RValue InitRVal; 791 switch (CGF.getEvaluationKind(Ty)) { 792 case TEK_Scalar: 793 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 794 break; 795 case TEK_Complex: 796 InitRVal = 797 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 798 break; 799 case TEK_Aggregate: 800 InitRVal = RValue::getAggregate(LV.getAddress()); 801 break; 802 } 803 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 804 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 805 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 806 /*IsInitializer=*/false); 807 } 808 } 809 810 /// \brief Emit initialization of arrays of complex types. 811 /// \param DestAddr Address of the array. 812 /// \param Type Type of array. 813 /// \param Init Initial expression of array. 814 /// \param SrcAddr Address of the original array. 815 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 816 QualType Type, bool EmitDeclareReductionInit, 817 const Expr *Init, 818 const OMPDeclareReductionDecl *DRD, 819 Address SrcAddr = Address::invalid()) { 820 // Perform element-by-element initialization. 821 QualType ElementTy; 822 823 // Drill down to the base element type on both arrays. 824 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 825 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 826 DestAddr = 827 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 828 if (DRD) 829 SrcAddr = 830 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 831 832 llvm::Value *SrcBegin = nullptr; 833 if (DRD) 834 SrcBegin = SrcAddr.getPointer(); 835 llvm::Value *DestBegin = DestAddr.getPointer(); 836 // Cast from pointer to array type to pointer to single element. 837 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 838 // The basic structure here is a while-do loop. 839 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 840 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 841 llvm::Value *IsEmpty = 842 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 843 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 844 845 // Enter the loop body, making that address the current address. 846 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 847 CGF.EmitBlock(BodyBB); 848 849 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 850 851 llvm::PHINode *SrcElementPHI = nullptr; 852 Address SrcElementCurrent = Address::invalid(); 853 if (DRD) { 854 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 855 "omp.arraycpy.srcElementPast"); 856 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 857 SrcElementCurrent = 858 Address(SrcElementPHI, 859 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 860 } 861 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 862 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 863 DestElementPHI->addIncoming(DestBegin, EntryBB); 864 Address DestElementCurrent = 865 Address(DestElementPHI, 866 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 867 868 // Emit copy. 869 { 870 CodeGenFunction::RunCleanupsScope InitScope(CGF); 871 if (EmitDeclareReductionInit) { 872 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 873 SrcElementCurrent, ElementTy); 874 } else 875 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 876 /*IsInitializer=*/false); 877 } 878 879 if (DRD) { 880 // Shift the address forward by one element. 881 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 882 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 883 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 884 } 885 886 // Shift the address forward by one element. 887 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 888 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 889 // Check whether we've reached the end. 890 llvm::Value *Done = 891 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 892 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 893 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 894 895 // Done. 896 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 897 } 898 899 static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> 900 isDeclareTargetDeclaration(const ValueDecl *VD) { 901 if (const auto *MD = dyn_cast<CXXMethodDecl>(VD)) 902 if (!MD->isStatic()) 903 return llvm::None; 904 for (const Decl *D : VD->redecls()) { 905 if (!D->hasAttrs()) 906 continue; 907 if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) 908 return Attr->getMapType(); 909 } 910 return llvm::None; 911 } 912 913 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 914 return CGF.EmitOMPSharedLValue(E); 915 } 916 917 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 918 const Expr *E) { 919 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 920 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 921 return LValue(); 922 } 923 924 void ReductionCodeGen::emitAggregateInitialization( 925 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 926 const OMPDeclareReductionDecl *DRD) { 927 // Emit VarDecl with copy init for arrays. 928 // Get the address of the original variable captured in current 929 // captured region. 930 const auto *PrivateVD = 931 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 932 bool EmitDeclareReductionInit = 933 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 934 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 935 EmitDeclareReductionInit, 936 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 937 : PrivateVD->getInit(), 938 DRD, SharedLVal.getAddress()); 939 } 940 941 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 942 ArrayRef<const Expr *> Privates, 943 ArrayRef<const Expr *> ReductionOps) { 944 ClausesData.reserve(Shareds.size()); 945 SharedAddresses.reserve(Shareds.size()); 946 Sizes.reserve(Shareds.size()); 947 BaseDecls.reserve(Shareds.size()); 948 auto IPriv = Privates.begin(); 949 auto IRed = ReductionOps.begin(); 950 for (const Expr *Ref : Shareds) { 951 ClausesData.emplace_back(Ref, *IPriv, *IRed); 952 std::advance(IPriv, 1); 953 std::advance(IRed, 1); 954 } 955 } 956 957 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 958 assert(SharedAddresses.size() == N && 959 "Number of generated lvalues must be exactly N."); 960 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 961 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 962 SharedAddresses.emplace_back(First, Second); 963 } 964 965 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 966 const auto *PrivateVD = 967 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 968 QualType PrivateType = PrivateVD->getType(); 969 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 970 if (!PrivateType->isVariablyModifiedType()) { 971 Sizes.emplace_back( 972 CGF.getTypeSize( 973 SharedAddresses[N].first.getType().getNonReferenceType()), 974 nullptr); 975 return; 976 } 977 llvm::Value *Size; 978 llvm::Value *SizeInChars; 979 auto *ElemType = 980 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 981 ->getElementType(); 982 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 983 if (AsArraySection) { 984 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 985 SharedAddresses[N].first.getPointer()); 986 Size = CGF.Builder.CreateNUWAdd( 987 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 988 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 989 } else { 990 SizeInChars = CGF.getTypeSize( 991 SharedAddresses[N].first.getType().getNonReferenceType()); 992 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 993 } 994 Sizes.emplace_back(SizeInChars, Size); 995 CodeGenFunction::OpaqueValueMapping OpaqueMap( 996 CGF, 997 cast<OpaqueValueExpr>( 998 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 999 RValue::get(Size)); 1000 CGF.EmitVariablyModifiedType(PrivateType); 1001 } 1002 1003 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1004 llvm::Value *Size) { 1005 const auto *PrivateVD = 1006 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1007 QualType PrivateType = PrivateVD->getType(); 1008 if (!PrivateType->isVariablyModifiedType()) { 1009 assert(!Size && !Sizes[N].second && 1010 "Size should be nullptr for non-variably modified reduction " 1011 "items."); 1012 return; 1013 } 1014 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1015 CGF, 1016 cast<OpaqueValueExpr>( 1017 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1018 RValue::get(Size)); 1019 CGF.EmitVariablyModifiedType(PrivateType); 1020 } 1021 1022 void ReductionCodeGen::emitInitialization( 1023 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1024 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1025 assert(SharedAddresses.size() > N && "No variable was generated"); 1026 const auto *PrivateVD = 1027 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1028 const OMPDeclareReductionDecl *DRD = 1029 getReductionInit(ClausesData[N].ReductionOp); 1030 QualType PrivateType = PrivateVD->getType(); 1031 PrivateAddr = CGF.Builder.CreateElementBitCast( 1032 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1033 QualType SharedType = SharedAddresses[N].first.getType(); 1034 SharedLVal = CGF.MakeAddrLValue( 1035 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1036 CGF.ConvertTypeForMem(SharedType)), 1037 SharedType, SharedAddresses[N].first.getBaseInfo(), 1038 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1039 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1040 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1041 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1042 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1043 PrivateAddr, SharedLVal.getAddress(), 1044 SharedLVal.getType()); 1045 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1046 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1047 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1048 PrivateVD->getType().getQualifiers(), 1049 /*IsInitializer=*/false); 1050 } 1051 } 1052 1053 bool ReductionCodeGen::needCleanups(unsigned N) { 1054 const auto *PrivateVD = 1055 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1056 QualType PrivateType = PrivateVD->getType(); 1057 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1058 return DTorKind != QualType::DK_none; 1059 } 1060 1061 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1062 Address PrivateAddr) { 1063 const auto *PrivateVD = 1064 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1065 QualType PrivateType = PrivateVD->getType(); 1066 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1067 if (needCleanups(N)) { 1068 PrivateAddr = CGF.Builder.CreateElementBitCast( 1069 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1070 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1071 } 1072 } 1073 1074 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1075 LValue BaseLV) { 1076 BaseTy = BaseTy.getNonReferenceType(); 1077 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1078 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1079 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1080 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1081 } else { 1082 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1083 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1084 } 1085 BaseTy = BaseTy->getPointeeType(); 1086 } 1087 return CGF.MakeAddrLValue( 1088 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1089 CGF.ConvertTypeForMem(ElTy)), 1090 BaseLV.getType(), BaseLV.getBaseInfo(), 1091 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1092 } 1093 1094 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1095 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1096 llvm::Value *Addr) { 1097 Address Tmp = Address::invalid(); 1098 Address TopTmp = Address::invalid(); 1099 Address MostTopTmp = Address::invalid(); 1100 BaseTy = BaseTy.getNonReferenceType(); 1101 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1102 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1103 Tmp = CGF.CreateMemTemp(BaseTy); 1104 if (TopTmp.isValid()) 1105 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1106 else 1107 MostTopTmp = Tmp; 1108 TopTmp = Tmp; 1109 BaseTy = BaseTy->getPointeeType(); 1110 } 1111 llvm::Type *Ty = BaseLVType; 1112 if (Tmp.isValid()) 1113 Ty = Tmp.getElementType(); 1114 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1115 if (Tmp.isValid()) { 1116 CGF.Builder.CreateStore(Addr, Tmp); 1117 return MostTopTmp; 1118 } 1119 return Address(Addr, BaseLVAlignment); 1120 } 1121 1122 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1123 const VarDecl *OrigVD = nullptr; 1124 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1125 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1126 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1127 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1128 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1129 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1130 DE = cast<DeclRefExpr>(Base); 1131 OrigVD = cast<VarDecl>(DE->getDecl()); 1132 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1133 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1134 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1135 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1136 DE = cast<DeclRefExpr>(Base); 1137 OrigVD = cast<VarDecl>(DE->getDecl()); 1138 } 1139 return OrigVD; 1140 } 1141 1142 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1143 Address PrivateAddr) { 1144 const DeclRefExpr *DE; 1145 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1146 BaseDecls.emplace_back(OrigVD); 1147 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1148 LValue BaseLValue = 1149 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1150 OriginalBaseLValue); 1151 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1152 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1153 llvm::Value *PrivatePointer = 1154 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1155 PrivateAddr.getPointer(), 1156 SharedAddresses[N].first.getAddress().getType()); 1157 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1158 return castToBase(CGF, OrigVD->getType(), 1159 SharedAddresses[N].first.getType(), 1160 OriginalBaseLValue.getAddress().getType(), 1161 OriginalBaseLValue.getAlignment(), Ptr); 1162 } 1163 BaseDecls.emplace_back( 1164 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1165 return PrivateAddr; 1166 } 1167 1168 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1169 const OMPDeclareReductionDecl *DRD = 1170 getReductionInit(ClausesData[N].ReductionOp); 1171 return DRD && DRD->getInitializer(); 1172 } 1173 1174 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1175 return CGF.EmitLoadOfPointerLValue( 1176 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1177 getThreadIDVariable()->getType()->castAs<PointerType>()); 1178 } 1179 1180 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1181 if (!CGF.HaveInsertPoint()) 1182 return; 1183 // 1.2.2 OpenMP Language Terminology 1184 // Structured block - An executable statement with a single entry at the 1185 // top and a single exit at the bottom. 1186 // The point of exit cannot be a branch out of the structured block. 1187 // longjmp() and throw() must not violate the entry/exit criteria. 1188 CGF.EHStack.pushTerminate(); 1189 CodeGen(CGF); 1190 CGF.EHStack.popTerminate(); 1191 } 1192 1193 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1194 CodeGenFunction &CGF) { 1195 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1196 getThreadIDVariable()->getType(), 1197 AlignmentSource::Decl); 1198 } 1199 1200 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1201 QualType FieldTy) { 1202 auto *Field = FieldDecl::Create( 1203 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1204 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1205 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1206 Field->setAccess(AS_public); 1207 DC->addDecl(Field); 1208 return Field; 1209 } 1210 1211 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1212 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 1213 ASTContext &C = CGM.getContext(); 1214 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1215 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1216 RD->startDefinition(); 1217 // reserved_1 1218 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1219 // flags 1220 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1221 // reserved_2 1222 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1223 // reserved_3 1224 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1225 // psource 1226 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1227 RD->completeDefinition(); 1228 IdentQTy = C.getRecordType(RD); 1229 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1230 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1231 1232 loadOffloadInfoMetadata(); 1233 } 1234 1235 void CGOpenMPRuntime::clear() { 1236 InternalVars.clear(); 1237 } 1238 1239 static llvm::Function * 1240 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1241 const Expr *CombinerInitializer, const VarDecl *In, 1242 const VarDecl *Out, bool IsCombiner) { 1243 // void .omp_combiner.(Ty *in, Ty *out); 1244 ASTContext &C = CGM.getContext(); 1245 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1246 FunctionArgList Args; 1247 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1248 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1249 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1250 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1251 Args.push_back(&OmpOutParm); 1252 Args.push_back(&OmpInParm); 1253 const CGFunctionInfo &FnInfo = 1254 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1255 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1256 auto *Fn = llvm::Function::Create( 1257 FnTy, llvm::GlobalValue::InternalLinkage, 1258 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 1259 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1260 Fn->removeFnAttr(llvm::Attribute::NoInline); 1261 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1262 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1263 CodeGenFunction CGF(CGM); 1264 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1265 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1266 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1267 Out->getLocation()); 1268 CodeGenFunction::OMPPrivateScope Scope(CGF); 1269 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1270 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1271 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1272 .getAddress(); 1273 }); 1274 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1275 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1276 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1277 .getAddress(); 1278 }); 1279 (void)Scope.Privatize(); 1280 if (!IsCombiner && Out->hasInit() && 1281 !CGF.isTrivialInitializer(Out->getInit())) { 1282 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1283 Out->getType().getQualifiers(), 1284 /*IsInitializer=*/true); 1285 } 1286 if (CombinerInitializer) 1287 CGF.EmitIgnoredExpr(CombinerInitializer); 1288 Scope.ForceCleanup(); 1289 CGF.FinishFunction(); 1290 return Fn; 1291 } 1292 1293 void CGOpenMPRuntime::emitUserDefinedReduction( 1294 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1295 if (UDRMap.count(D) > 0) 1296 return; 1297 ASTContext &C = CGM.getContext(); 1298 if (!In || !Out) { 1299 In = &C.Idents.get("omp_in"); 1300 Out = &C.Idents.get("omp_out"); 1301 } 1302 llvm::Function *Combiner = emitCombinerOrInitializer( 1303 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 1304 cast<VarDecl>(D->lookup(Out).front()), 1305 /*IsCombiner=*/true); 1306 llvm::Function *Initializer = nullptr; 1307 if (const Expr *Init = D->getInitializer()) { 1308 if (!Priv || !Orig) { 1309 Priv = &C.Idents.get("omp_priv"); 1310 Orig = &C.Idents.get("omp_orig"); 1311 } 1312 Initializer = emitCombinerOrInitializer( 1313 CGM, D->getType(), 1314 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1315 : nullptr, 1316 cast<VarDecl>(D->lookup(Orig).front()), 1317 cast<VarDecl>(D->lookup(Priv).front()), 1318 /*IsCombiner=*/false); 1319 } 1320 UDRMap.try_emplace(D, Combiner, Initializer); 1321 if (CGF) { 1322 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1323 Decls.second.push_back(D); 1324 } 1325 } 1326 1327 std::pair<llvm::Function *, llvm::Function *> 1328 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1329 auto I = UDRMap.find(D); 1330 if (I != UDRMap.end()) 1331 return I->second; 1332 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1333 return UDRMap.lookup(D); 1334 } 1335 1336 static llvm::Value *emitParallelOrTeamsOutlinedFunction( 1337 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1338 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1339 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1340 assert(ThreadIDVar->getType()->isPointerType() && 1341 "thread id variable must be of type kmp_int32 *"); 1342 CodeGenFunction CGF(CGM, true); 1343 bool HasCancel = false; 1344 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1345 HasCancel = OPD->hasCancel(); 1346 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1347 HasCancel = OPSD->hasCancel(); 1348 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1349 HasCancel = OPFD->hasCancel(); 1350 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1351 HasCancel = OPFD->hasCancel(); 1352 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1353 HasCancel = OPFD->hasCancel(); 1354 else if (const auto *OPFD = 1355 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1356 HasCancel = OPFD->hasCancel(); 1357 else if (const auto *OPFD = 1358 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1359 HasCancel = OPFD->hasCancel(); 1360 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1361 HasCancel, OutlinedHelperName); 1362 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1363 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1364 } 1365 1366 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 1367 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1368 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1369 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1370 return emitParallelOrTeamsOutlinedFunction( 1371 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1372 } 1373 1374 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1375 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1376 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1377 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1378 return emitParallelOrTeamsOutlinedFunction( 1379 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1380 } 1381 1382 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 1383 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1384 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1385 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1386 bool Tied, unsigned &NumberOfParts) { 1387 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1388 PrePostActionTy &) { 1389 llvm::Value *ThreadID = getThreadID(CGF, D.getLocStart()); 1390 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 1391 llvm::Value *TaskArgs[] = { 1392 UpLoc, ThreadID, 1393 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1394 TaskTVar->getType()->castAs<PointerType>()) 1395 .getPointer()}; 1396 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1397 }; 1398 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1399 UntiedCodeGen); 1400 CodeGen.setAction(Action); 1401 assert(!ThreadIDVar->getType()->isPointerType() && 1402 "thread id variable must be of type kmp_int32 for tasks"); 1403 const OpenMPDirectiveKind Region = 1404 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1405 : OMPD_task; 1406 const CapturedStmt *CS = D.getCapturedStmt(Region); 1407 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1408 CodeGenFunction CGF(CGM, true); 1409 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1410 InnermostKind, 1411 TD ? TD->hasCancel() : false, Action); 1412 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1413 llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS); 1414 if (!Tied) 1415 NumberOfParts = Action.getNumberOfParts(); 1416 return Res; 1417 } 1418 1419 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1420 const RecordDecl *RD, const CGRecordLayout &RL, 1421 ArrayRef<llvm::Constant *> Data) { 1422 llvm::StructType *StructTy = RL.getLLVMType(); 1423 unsigned PrevIdx = 0; 1424 ConstantInitBuilder CIBuilder(CGM); 1425 auto DI = Data.begin(); 1426 for (const FieldDecl *FD : RD->fields()) { 1427 unsigned Idx = RL.getLLVMFieldNo(FD); 1428 // Fill the alignment. 1429 for (unsigned I = PrevIdx; I < Idx; ++I) 1430 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1431 PrevIdx = Idx + 1; 1432 Fields.add(*DI); 1433 ++DI; 1434 } 1435 } 1436 1437 template <class... As> 1438 static llvm::GlobalVariable * 1439 createConstantGlobalStruct(CodeGenModule &CGM, QualType Ty, 1440 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1441 As &&... Args) { 1442 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1443 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1444 ConstantInitBuilder CIBuilder(CGM); 1445 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1446 buildStructValue(Fields, CGM, RD, RL, Data); 1447 return Fields.finishAndCreateGlobal( 1448 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), 1449 /*isConstant=*/true, std::forward<As>(Args)...); 1450 } 1451 1452 template <typename T> 1453 void createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1454 ArrayRef<llvm::Constant *> Data, 1455 T &Parent) { 1456 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1457 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1458 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1459 buildStructValue(Fields, CGM, RD, RL, Data); 1460 Fields.finishAndAddTo(Parent); 1461 } 1462 1463 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1464 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1465 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 1466 if (!Entry) { 1467 if (!DefaultOpenMPPSource) { 1468 // Initialize default location for psource field of ident_t structure of 1469 // all ident_t objects. Format is ";file;function;line;column;;". 1470 // Taken from 1471 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 1472 DefaultOpenMPPSource = 1473 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1474 DefaultOpenMPPSource = 1475 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1476 } 1477 1478 llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1479 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1480 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1481 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1482 DefaultOpenMPPSource}; 1483 llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct( 1484 CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage); 1485 DefaultOpenMPLocation->setUnnamedAddr( 1486 llvm::GlobalValue::UnnamedAddr::Global); 1487 1488 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 1489 } 1490 return Address(Entry, Align); 1491 } 1492 1493 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1494 SourceLocation Loc, 1495 unsigned Flags) { 1496 Flags |= OMP_IDENT_KMPC; 1497 // If no debug info is generated - return global default location. 1498 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1499 Loc.isInvalid()) 1500 return getOrCreateDefaultLocation(Flags).getPointer(); 1501 1502 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1503 1504 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1505 Address LocValue = Address::invalid(); 1506 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1507 if (I != OpenMPLocThreadIDMap.end()) 1508 LocValue = Address(I->second.DebugLoc, Align); 1509 1510 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1511 // GetOpenMPThreadID was called before this routine. 1512 if (!LocValue.isValid()) { 1513 // Generate "ident_t .kmpc_loc.addr;" 1514 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1515 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1516 Elem.second.DebugLoc = AI.getPointer(); 1517 LocValue = AI; 1518 1519 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1520 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1521 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1522 CGF.getTypeSize(IdentQTy)); 1523 } 1524 1525 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1526 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1527 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1528 LValue PSource = 1529 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1530 1531 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1532 if (OMPDebugLoc == nullptr) { 1533 SmallString<128> Buffer2; 1534 llvm::raw_svector_ostream OS2(Buffer2); 1535 // Build debug location 1536 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1537 OS2 << ";" << PLoc.getFilename() << ";"; 1538 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1539 OS2 << FD->getQualifiedNameAsString(); 1540 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1541 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1542 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1543 } 1544 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1545 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1546 1547 // Our callers always pass this to a runtime function, so for 1548 // convenience, go ahead and return a naked pointer. 1549 return LocValue.getPointer(); 1550 } 1551 1552 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1553 SourceLocation Loc) { 1554 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1555 1556 llvm::Value *ThreadID = nullptr; 1557 // Check whether we've already cached a load of the thread id in this 1558 // function. 1559 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1560 if (I != OpenMPLocThreadIDMap.end()) { 1561 ThreadID = I->second.ThreadID; 1562 if (ThreadID != nullptr) 1563 return ThreadID; 1564 } 1565 // If exceptions are enabled, do not use parameter to avoid possible crash. 1566 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1567 !CGF.getLangOpts().CXXExceptions || 1568 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1569 if (auto *OMPRegionInfo = 1570 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1571 if (OMPRegionInfo->getThreadIDVariable()) { 1572 // Check if this an outlined function with thread id passed as argument. 1573 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1574 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1575 // If value loaded in entry block, cache it and use it everywhere in 1576 // function. 1577 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1578 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1579 Elem.second.ThreadID = ThreadID; 1580 } 1581 return ThreadID; 1582 } 1583 } 1584 } 1585 1586 // This is not an outlined function region - need to call __kmpc_int32 1587 // kmpc_global_thread_num(ident_t *loc). 1588 // Generate thread id value and cache this value for use across the 1589 // function. 1590 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1591 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1592 llvm::CallInst *Call = CGF.Builder.CreateCall( 1593 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1594 emitUpdateLocation(CGF, Loc)); 1595 Call->setCallingConv(CGF.getRuntimeCC()); 1596 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1597 Elem.second.ThreadID = Call; 1598 return Call; 1599 } 1600 1601 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1602 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1603 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1604 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1605 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1606 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1607 UDRMap.erase(D); 1608 FunctionUDRMap.erase(CGF.CurFn); 1609 } 1610 } 1611 1612 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1613 return IdentTy->getPointerTo(); 1614 } 1615 1616 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1617 if (!Kmpc_MicroTy) { 1618 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1619 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1620 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1621 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1622 } 1623 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1624 } 1625 1626 llvm::Constant * 1627 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1628 llvm::Constant *RTLFn = nullptr; 1629 switch (static_cast<OpenMPRTLFunction>(Function)) { 1630 case OMPRTL__kmpc_fork_call: { 1631 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1632 // microtask, ...); 1633 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1634 getKmpc_MicroPointerTy()}; 1635 auto *FnTy = 1636 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1637 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1638 break; 1639 } 1640 case OMPRTL__kmpc_global_thread_num: { 1641 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1642 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1643 auto *FnTy = 1644 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1645 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1646 break; 1647 } 1648 case OMPRTL__kmpc_threadprivate_cached: { 1649 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1650 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1651 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1652 CGM.VoidPtrTy, CGM.SizeTy, 1653 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1654 auto *FnTy = 1655 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1656 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1657 break; 1658 } 1659 case OMPRTL__kmpc_critical: { 1660 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1661 // kmp_critical_name *crit); 1662 llvm::Type *TypeParams[] = { 1663 getIdentTyPointerTy(), CGM.Int32Ty, 1664 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1665 auto *FnTy = 1666 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1667 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1668 break; 1669 } 1670 case OMPRTL__kmpc_critical_with_hint: { 1671 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1672 // kmp_critical_name *crit, uintptr_t hint); 1673 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1674 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1675 CGM.IntPtrTy}; 1676 auto *FnTy = 1677 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1678 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1679 break; 1680 } 1681 case OMPRTL__kmpc_threadprivate_register: { 1682 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1683 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1684 // typedef void *(*kmpc_ctor)(void *); 1685 auto *KmpcCtorTy = 1686 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1687 /*isVarArg*/ false)->getPointerTo(); 1688 // typedef void *(*kmpc_cctor)(void *, void *); 1689 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1690 auto *KmpcCopyCtorTy = 1691 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1692 /*isVarArg*/ false) 1693 ->getPointerTo(); 1694 // typedef void (*kmpc_dtor)(void *); 1695 auto *KmpcDtorTy = 1696 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1697 ->getPointerTo(); 1698 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1699 KmpcCopyCtorTy, KmpcDtorTy}; 1700 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1701 /*isVarArg*/ false); 1702 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1703 break; 1704 } 1705 case OMPRTL__kmpc_end_critical: { 1706 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1707 // kmp_critical_name *crit); 1708 llvm::Type *TypeParams[] = { 1709 getIdentTyPointerTy(), CGM.Int32Ty, 1710 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1711 auto *FnTy = 1712 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1713 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1714 break; 1715 } 1716 case OMPRTL__kmpc_cancel_barrier: { 1717 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1718 // global_tid); 1719 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1720 auto *FnTy = 1721 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1722 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1723 break; 1724 } 1725 case OMPRTL__kmpc_barrier: { 1726 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1727 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1728 auto *FnTy = 1729 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1730 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1731 break; 1732 } 1733 case OMPRTL__kmpc_for_static_fini: { 1734 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1735 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1736 auto *FnTy = 1737 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1738 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1739 break; 1740 } 1741 case OMPRTL__kmpc_push_num_threads: { 1742 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1743 // kmp_int32 num_threads) 1744 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1745 CGM.Int32Ty}; 1746 auto *FnTy = 1747 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1748 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1749 break; 1750 } 1751 case OMPRTL__kmpc_serialized_parallel: { 1752 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1753 // global_tid); 1754 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1755 auto *FnTy = 1756 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1757 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1758 break; 1759 } 1760 case OMPRTL__kmpc_end_serialized_parallel: { 1761 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1762 // global_tid); 1763 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1764 auto *FnTy = 1765 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1766 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1767 break; 1768 } 1769 case OMPRTL__kmpc_flush: { 1770 // Build void __kmpc_flush(ident_t *loc); 1771 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1772 auto *FnTy = 1773 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1774 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1775 break; 1776 } 1777 case OMPRTL__kmpc_master: { 1778 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1779 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1780 auto *FnTy = 1781 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1782 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1783 break; 1784 } 1785 case OMPRTL__kmpc_end_master: { 1786 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1787 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1788 auto *FnTy = 1789 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1790 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1791 break; 1792 } 1793 case OMPRTL__kmpc_omp_taskyield: { 1794 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1795 // int end_part); 1796 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1797 auto *FnTy = 1798 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1799 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1800 break; 1801 } 1802 case OMPRTL__kmpc_single: { 1803 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1804 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1805 auto *FnTy = 1806 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1807 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1808 break; 1809 } 1810 case OMPRTL__kmpc_end_single: { 1811 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1812 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1813 auto *FnTy = 1814 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1815 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1816 break; 1817 } 1818 case OMPRTL__kmpc_omp_task_alloc: { 1819 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1820 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1821 // kmp_routine_entry_t *task_entry); 1822 assert(KmpRoutineEntryPtrTy != nullptr && 1823 "Type kmp_routine_entry_t must be created."); 1824 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1825 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1826 // Return void * and then cast to particular kmp_task_t type. 1827 auto *FnTy = 1828 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1829 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1830 break; 1831 } 1832 case OMPRTL__kmpc_omp_task: { 1833 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1834 // *new_task); 1835 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1836 CGM.VoidPtrTy}; 1837 auto *FnTy = 1838 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1839 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1840 break; 1841 } 1842 case OMPRTL__kmpc_copyprivate: { 1843 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1844 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1845 // kmp_int32 didit); 1846 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1847 auto *CpyFnTy = 1848 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1849 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1850 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1851 CGM.Int32Ty}; 1852 auto *FnTy = 1853 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1854 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1855 break; 1856 } 1857 case OMPRTL__kmpc_reduce: { 1858 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1859 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1860 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1861 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1862 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1863 /*isVarArg=*/false); 1864 llvm::Type *TypeParams[] = { 1865 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1866 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1867 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1868 auto *FnTy = 1869 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1870 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1871 break; 1872 } 1873 case OMPRTL__kmpc_reduce_nowait: { 1874 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1875 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1876 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1877 // *lck); 1878 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1879 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1880 /*isVarArg=*/false); 1881 llvm::Type *TypeParams[] = { 1882 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1883 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1884 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1885 auto *FnTy = 1886 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1887 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1888 break; 1889 } 1890 case OMPRTL__kmpc_end_reduce: { 1891 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1892 // kmp_critical_name *lck); 1893 llvm::Type *TypeParams[] = { 1894 getIdentTyPointerTy(), CGM.Int32Ty, 1895 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1896 auto *FnTy = 1897 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1898 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1899 break; 1900 } 1901 case OMPRTL__kmpc_end_reduce_nowait: { 1902 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1903 // kmp_critical_name *lck); 1904 llvm::Type *TypeParams[] = { 1905 getIdentTyPointerTy(), CGM.Int32Ty, 1906 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1907 auto *FnTy = 1908 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1909 RTLFn = 1910 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1911 break; 1912 } 1913 case OMPRTL__kmpc_omp_task_begin_if0: { 1914 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1915 // *new_task); 1916 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1917 CGM.VoidPtrTy}; 1918 auto *FnTy = 1919 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1920 RTLFn = 1921 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1922 break; 1923 } 1924 case OMPRTL__kmpc_omp_task_complete_if0: { 1925 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1926 // *new_task); 1927 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1928 CGM.VoidPtrTy}; 1929 auto *FnTy = 1930 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1931 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1932 /*Name=*/"__kmpc_omp_task_complete_if0"); 1933 break; 1934 } 1935 case OMPRTL__kmpc_ordered: { 1936 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1937 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1938 auto *FnTy = 1939 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1940 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1941 break; 1942 } 1943 case OMPRTL__kmpc_end_ordered: { 1944 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1945 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1946 auto *FnTy = 1947 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1948 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1949 break; 1950 } 1951 case OMPRTL__kmpc_omp_taskwait: { 1952 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1953 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1954 auto *FnTy = 1955 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1956 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1957 break; 1958 } 1959 case OMPRTL__kmpc_taskgroup: { 1960 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1961 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1962 auto *FnTy = 1963 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1964 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1965 break; 1966 } 1967 case OMPRTL__kmpc_end_taskgroup: { 1968 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1969 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1970 auto *FnTy = 1971 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1972 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1973 break; 1974 } 1975 case OMPRTL__kmpc_push_proc_bind: { 1976 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1977 // int proc_bind) 1978 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1979 auto *FnTy = 1980 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1981 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1982 break; 1983 } 1984 case OMPRTL__kmpc_omp_task_with_deps: { 1985 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1986 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1987 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1988 llvm::Type *TypeParams[] = { 1989 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1990 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1991 auto *FnTy = 1992 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1993 RTLFn = 1994 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1995 break; 1996 } 1997 case OMPRTL__kmpc_omp_wait_deps: { 1998 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1999 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2000 // kmp_depend_info_t *noalias_dep_list); 2001 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2002 CGM.Int32Ty, CGM.VoidPtrTy, 2003 CGM.Int32Ty, CGM.VoidPtrTy}; 2004 auto *FnTy = 2005 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2006 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2007 break; 2008 } 2009 case OMPRTL__kmpc_cancellationpoint: { 2010 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2011 // global_tid, kmp_int32 cncl_kind) 2012 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2013 auto *FnTy = 2014 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2015 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2016 break; 2017 } 2018 case OMPRTL__kmpc_cancel: { 2019 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2020 // kmp_int32 cncl_kind) 2021 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2022 auto *FnTy = 2023 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2024 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2025 break; 2026 } 2027 case OMPRTL__kmpc_push_num_teams: { 2028 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2029 // kmp_int32 num_teams, kmp_int32 num_threads) 2030 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2031 CGM.Int32Ty}; 2032 auto *FnTy = 2033 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2034 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2035 break; 2036 } 2037 case OMPRTL__kmpc_fork_teams: { 2038 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2039 // microtask, ...); 2040 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2041 getKmpc_MicroPointerTy()}; 2042 auto *FnTy = 2043 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2044 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2045 break; 2046 } 2047 case OMPRTL__kmpc_taskloop: { 2048 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2049 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2050 // sched, kmp_uint64 grainsize, void *task_dup); 2051 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2052 CGM.IntTy, 2053 CGM.VoidPtrTy, 2054 CGM.IntTy, 2055 CGM.Int64Ty->getPointerTo(), 2056 CGM.Int64Ty->getPointerTo(), 2057 CGM.Int64Ty, 2058 CGM.IntTy, 2059 CGM.IntTy, 2060 CGM.Int64Ty, 2061 CGM.VoidPtrTy}; 2062 auto *FnTy = 2063 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2064 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2065 break; 2066 } 2067 case OMPRTL__kmpc_doacross_init: { 2068 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2069 // num_dims, struct kmp_dim *dims); 2070 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2071 CGM.Int32Ty, 2072 CGM.Int32Ty, 2073 CGM.VoidPtrTy}; 2074 auto *FnTy = 2075 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2076 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2077 break; 2078 } 2079 case OMPRTL__kmpc_doacross_fini: { 2080 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2081 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2082 auto *FnTy = 2083 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2084 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2085 break; 2086 } 2087 case OMPRTL__kmpc_doacross_post: { 2088 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2089 // *vec); 2090 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2091 CGM.Int64Ty->getPointerTo()}; 2092 auto *FnTy = 2093 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2094 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2095 break; 2096 } 2097 case OMPRTL__kmpc_doacross_wait: { 2098 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2099 // *vec); 2100 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2101 CGM.Int64Ty->getPointerTo()}; 2102 auto *FnTy = 2103 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2104 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2105 break; 2106 } 2107 case OMPRTL__kmpc_task_reduction_init: { 2108 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2109 // *data); 2110 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2111 auto *FnTy = 2112 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2113 RTLFn = 2114 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2115 break; 2116 } 2117 case OMPRTL__kmpc_task_reduction_get_th_data: { 2118 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2119 // *d); 2120 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2121 auto *FnTy = 2122 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2123 RTLFn = CGM.CreateRuntimeFunction( 2124 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2125 break; 2126 } 2127 case OMPRTL__tgt_target: { 2128 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2129 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2130 // *arg_types); 2131 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2132 CGM.VoidPtrTy, 2133 CGM.Int32Ty, 2134 CGM.VoidPtrPtrTy, 2135 CGM.VoidPtrPtrTy, 2136 CGM.SizeTy->getPointerTo(), 2137 CGM.Int64Ty->getPointerTo()}; 2138 auto *FnTy = 2139 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2140 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2141 break; 2142 } 2143 case OMPRTL__tgt_target_nowait: { 2144 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2145 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2146 // int64_t *arg_types); 2147 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2148 CGM.VoidPtrTy, 2149 CGM.Int32Ty, 2150 CGM.VoidPtrPtrTy, 2151 CGM.VoidPtrPtrTy, 2152 CGM.SizeTy->getPointerTo(), 2153 CGM.Int64Ty->getPointerTo()}; 2154 auto *FnTy = 2155 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2156 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2157 break; 2158 } 2159 case OMPRTL__tgt_target_teams: { 2160 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2161 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2162 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2163 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2164 CGM.VoidPtrTy, 2165 CGM.Int32Ty, 2166 CGM.VoidPtrPtrTy, 2167 CGM.VoidPtrPtrTy, 2168 CGM.SizeTy->getPointerTo(), 2169 CGM.Int64Ty->getPointerTo(), 2170 CGM.Int32Ty, 2171 CGM.Int32Ty}; 2172 auto *FnTy = 2173 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2174 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2175 break; 2176 } 2177 case OMPRTL__tgt_target_teams_nowait: { 2178 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2179 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2180 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2181 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2182 CGM.VoidPtrTy, 2183 CGM.Int32Ty, 2184 CGM.VoidPtrPtrTy, 2185 CGM.VoidPtrPtrTy, 2186 CGM.SizeTy->getPointerTo(), 2187 CGM.Int64Ty->getPointerTo(), 2188 CGM.Int32Ty, 2189 CGM.Int32Ty}; 2190 auto *FnTy = 2191 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2192 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2193 break; 2194 } 2195 case OMPRTL__tgt_register_lib: { 2196 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2197 QualType ParamTy = 2198 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2199 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2200 auto *FnTy = 2201 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2202 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2203 break; 2204 } 2205 case OMPRTL__tgt_unregister_lib: { 2206 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2207 QualType ParamTy = 2208 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2209 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2210 auto *FnTy = 2211 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2212 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2213 break; 2214 } 2215 case OMPRTL__tgt_target_data_begin: { 2216 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2217 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2218 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2219 CGM.Int32Ty, 2220 CGM.VoidPtrPtrTy, 2221 CGM.VoidPtrPtrTy, 2222 CGM.SizeTy->getPointerTo(), 2223 CGM.Int64Ty->getPointerTo()}; 2224 auto *FnTy = 2225 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2226 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2227 break; 2228 } 2229 case OMPRTL__tgt_target_data_begin_nowait: { 2230 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2231 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2232 // *arg_types); 2233 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2234 CGM.Int32Ty, 2235 CGM.VoidPtrPtrTy, 2236 CGM.VoidPtrPtrTy, 2237 CGM.SizeTy->getPointerTo(), 2238 CGM.Int64Ty->getPointerTo()}; 2239 auto *FnTy = 2240 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2241 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2242 break; 2243 } 2244 case OMPRTL__tgt_target_data_end: { 2245 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2246 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2247 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2248 CGM.Int32Ty, 2249 CGM.VoidPtrPtrTy, 2250 CGM.VoidPtrPtrTy, 2251 CGM.SizeTy->getPointerTo(), 2252 CGM.Int64Ty->getPointerTo()}; 2253 auto *FnTy = 2254 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2255 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2256 break; 2257 } 2258 case OMPRTL__tgt_target_data_end_nowait: { 2259 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2260 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2261 // *arg_types); 2262 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2263 CGM.Int32Ty, 2264 CGM.VoidPtrPtrTy, 2265 CGM.VoidPtrPtrTy, 2266 CGM.SizeTy->getPointerTo(), 2267 CGM.Int64Ty->getPointerTo()}; 2268 auto *FnTy = 2269 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2270 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2271 break; 2272 } 2273 case OMPRTL__tgt_target_data_update: { 2274 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2275 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2276 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2277 CGM.Int32Ty, 2278 CGM.VoidPtrPtrTy, 2279 CGM.VoidPtrPtrTy, 2280 CGM.SizeTy->getPointerTo(), 2281 CGM.Int64Ty->getPointerTo()}; 2282 auto *FnTy = 2283 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2284 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2285 break; 2286 } 2287 case OMPRTL__tgt_target_data_update_nowait: { 2288 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2289 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2290 // *arg_types); 2291 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2292 CGM.Int32Ty, 2293 CGM.VoidPtrPtrTy, 2294 CGM.VoidPtrPtrTy, 2295 CGM.SizeTy->getPointerTo(), 2296 CGM.Int64Ty->getPointerTo()}; 2297 auto *FnTy = 2298 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2299 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2300 break; 2301 } 2302 } 2303 assert(RTLFn && "Unable to find OpenMP runtime function"); 2304 return RTLFn; 2305 } 2306 2307 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 2308 bool IVSigned) { 2309 assert((IVSize == 32 || IVSize == 64) && 2310 "IV size is not compatible with the omp runtime"); 2311 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2312 : "__kmpc_for_static_init_4u") 2313 : (IVSigned ? "__kmpc_for_static_init_8" 2314 : "__kmpc_for_static_init_8u"); 2315 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2316 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2317 llvm::Type *TypeParams[] = { 2318 getIdentTyPointerTy(), // loc 2319 CGM.Int32Ty, // tid 2320 CGM.Int32Ty, // schedtype 2321 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2322 PtrTy, // p_lower 2323 PtrTy, // p_upper 2324 PtrTy, // p_stride 2325 ITy, // incr 2326 ITy // chunk 2327 }; 2328 auto *FnTy = 2329 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2330 return CGM.CreateRuntimeFunction(FnTy, Name); 2331 } 2332 2333 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 2334 bool IVSigned) { 2335 assert((IVSize == 32 || IVSize == 64) && 2336 "IV size is not compatible with the omp runtime"); 2337 StringRef Name = 2338 IVSize == 32 2339 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2340 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2341 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2342 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2343 CGM.Int32Ty, // tid 2344 CGM.Int32Ty, // schedtype 2345 ITy, // lower 2346 ITy, // upper 2347 ITy, // stride 2348 ITy // chunk 2349 }; 2350 auto *FnTy = 2351 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2352 return CGM.CreateRuntimeFunction(FnTy, Name); 2353 } 2354 2355 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 2356 bool IVSigned) { 2357 assert((IVSize == 32 || IVSize == 64) && 2358 "IV size is not compatible with the omp runtime"); 2359 StringRef Name = 2360 IVSize == 32 2361 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2362 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2363 llvm::Type *TypeParams[] = { 2364 getIdentTyPointerTy(), // loc 2365 CGM.Int32Ty, // tid 2366 }; 2367 auto *FnTy = 2368 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2369 return CGM.CreateRuntimeFunction(FnTy, Name); 2370 } 2371 2372 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 2373 bool IVSigned) { 2374 assert((IVSize == 32 || IVSize == 64) && 2375 "IV size is not compatible with the omp runtime"); 2376 StringRef Name = 2377 IVSize == 32 2378 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2379 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2380 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2381 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2382 llvm::Type *TypeParams[] = { 2383 getIdentTyPointerTy(), // loc 2384 CGM.Int32Ty, // tid 2385 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2386 PtrTy, // p_lower 2387 PtrTy, // p_upper 2388 PtrTy // p_stride 2389 }; 2390 auto *FnTy = 2391 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2392 return CGM.CreateRuntimeFunction(FnTy, Name); 2393 } 2394 2395 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { 2396 if (CGM.getLangOpts().OpenMPSimd) 2397 return Address::invalid(); 2398 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2399 isDeclareTargetDeclaration(VD); 2400 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { 2401 SmallString<64> PtrName; 2402 { 2403 llvm::raw_svector_ostream OS(PtrName); 2404 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; 2405 } 2406 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2407 if (!Ptr) { 2408 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2409 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2410 PtrName); 2411 if (!CGM.getLangOpts().OpenMPIsDevice) { 2412 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2413 GV->setLinkage(llvm::GlobalValue::ExternalLinkage); 2414 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2415 } 2416 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); 2417 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2418 } 2419 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2420 } 2421 return Address::invalid(); 2422 } 2423 2424 llvm::Constant * 2425 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2426 assert(!CGM.getLangOpts().OpenMPUseTLS || 2427 !CGM.getContext().getTargetInfo().isTLSSupported()); 2428 // Lookup the entry, lazily creating it if necessary. 2429 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 2430 Twine(CGM.getMangledName(VD), ".cache.")); 2431 } 2432 2433 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2434 const VarDecl *VD, 2435 Address VDAddr, 2436 SourceLocation Loc) { 2437 if (CGM.getLangOpts().OpenMPUseTLS && 2438 CGM.getContext().getTargetInfo().isTLSSupported()) 2439 return VDAddr; 2440 2441 llvm::Type *VarTy = VDAddr.getElementType(); 2442 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2443 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2444 CGM.Int8PtrTy), 2445 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2446 getOrCreateThreadPrivateCache(VD)}; 2447 return Address(CGF.EmitRuntimeCall( 2448 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2449 VDAddr.getAlignment()); 2450 } 2451 2452 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2453 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2454 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2455 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2456 // library. 2457 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2458 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2459 OMPLoc); 2460 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2461 // to register constructor/destructor for variable. 2462 llvm::Value *Args[] = { 2463 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2464 Ctor, CopyCtor, Dtor}; 2465 CGF.EmitRuntimeCall( 2466 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2467 } 2468 2469 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2470 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2471 bool PerformInit, CodeGenFunction *CGF) { 2472 if (CGM.getLangOpts().OpenMPUseTLS && 2473 CGM.getContext().getTargetInfo().isTLSSupported()) 2474 return nullptr; 2475 2476 VD = VD->getDefinition(CGM.getContext()); 2477 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 2478 ThreadPrivateWithDefinition.insert(VD); 2479 QualType ASTTy = VD->getType(); 2480 2481 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2482 const Expr *Init = VD->getAnyInitializer(); 2483 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2484 // Generate function that re-emits the declaration's initializer into the 2485 // threadprivate copy of the variable VD 2486 CodeGenFunction CtorCGF(CGM); 2487 FunctionArgList Args; 2488 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2489 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2490 ImplicitParamDecl::Other); 2491 Args.push_back(&Dst); 2492 2493 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2494 CGM.getContext().VoidPtrTy, Args); 2495 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2496 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2497 FTy, ".__kmpc_global_ctor_.", FI, Loc); 2498 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2499 Args, Loc, Loc); 2500 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2501 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2502 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2503 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2504 Arg = CtorCGF.Builder.CreateElementBitCast( 2505 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2506 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2507 /*IsInitializer=*/true); 2508 ArgVal = CtorCGF.EmitLoadOfScalar( 2509 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2510 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2511 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2512 CtorCGF.FinishFunction(); 2513 Ctor = Fn; 2514 } 2515 if (VD->getType().isDestructedType() != QualType::DK_none) { 2516 // Generate function that emits destructor call for the threadprivate copy 2517 // of the variable VD 2518 CodeGenFunction DtorCGF(CGM); 2519 FunctionArgList Args; 2520 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2521 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2522 ImplicitParamDecl::Other); 2523 Args.push_back(&Dst); 2524 2525 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2526 CGM.getContext().VoidTy, Args); 2527 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2528 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2529 FTy, ".__kmpc_global_dtor_.", FI, Loc); 2530 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2531 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2532 Loc, Loc); 2533 // Create a scope with an artificial location for the body of this function. 2534 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2535 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2536 DtorCGF.GetAddrOfLocalVar(&Dst), 2537 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2538 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2539 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2540 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2541 DtorCGF.FinishFunction(); 2542 Dtor = Fn; 2543 } 2544 // Do not emit init function if it is not required. 2545 if (!Ctor && !Dtor) 2546 return nullptr; 2547 2548 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2549 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2550 /*isVarArg=*/false) 2551 ->getPointerTo(); 2552 // Copying constructor for the threadprivate variable. 2553 // Must be NULL - reserved by runtime, but currently it requires that this 2554 // parameter is always NULL. Otherwise it fires assertion. 2555 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2556 if (Ctor == nullptr) { 2557 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2558 /*isVarArg=*/false) 2559 ->getPointerTo(); 2560 Ctor = llvm::Constant::getNullValue(CtorTy); 2561 } 2562 if (Dtor == nullptr) { 2563 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2564 /*isVarArg=*/false) 2565 ->getPointerTo(); 2566 Dtor = llvm::Constant::getNullValue(DtorTy); 2567 } 2568 if (!CGF) { 2569 auto *InitFunctionTy = 2570 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2571 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2572 InitFunctionTy, ".__omp_threadprivate_init_.", 2573 CGM.getTypes().arrangeNullaryFunction()); 2574 CodeGenFunction InitCGF(CGM); 2575 FunctionArgList ArgList; 2576 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2577 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2578 Loc, Loc); 2579 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2580 InitCGF.FinishFunction(); 2581 return InitFunction; 2582 } 2583 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2584 } 2585 return nullptr; 2586 } 2587 2588 /// \brief Obtain information that uniquely identifies a target entry. This 2589 /// consists of the file and device IDs as well as line number associated with 2590 /// the relevant entry source location. 2591 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2592 unsigned &DeviceID, unsigned &FileID, 2593 unsigned &LineNum) { 2594 SourceManager &SM = C.getSourceManager(); 2595 2596 // The loc should be always valid and have a file ID (the user cannot use 2597 // #pragma directives in macros) 2598 2599 assert(Loc.isValid() && "Source location is expected to be always valid."); 2600 2601 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2602 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2603 2604 llvm::sys::fs::UniqueID ID; 2605 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2606 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2607 << PLoc.getFilename() << EC.message(); 2608 2609 DeviceID = ID.getDevice(); 2610 FileID = ID.getFile(); 2611 LineNum = PLoc.getLine(); 2612 } 2613 2614 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2615 llvm::GlobalVariable *Addr, 2616 bool PerformInit) { 2617 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2618 isDeclareTargetDeclaration(VD); 2619 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) 2620 return false; 2621 VD = VD->getDefinition(CGM.getContext()); 2622 if (VD && !DeclareTargetWithDefinition.insert(VD).second) 2623 return CGM.getLangOpts().OpenMPIsDevice; 2624 2625 QualType ASTTy = VD->getType(); 2626 2627 SourceLocation Loc = VD->getCanonicalDecl()->getLocStart(); 2628 // Produce the unique prefix to identify the new target regions. We use 2629 // the source location of the variable declaration which we know to not 2630 // conflict with any target region. 2631 unsigned DeviceID; 2632 unsigned FileID; 2633 unsigned Line; 2634 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2635 SmallString<128> Buffer, Out; 2636 { 2637 llvm::raw_svector_ostream OS(Buffer); 2638 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2639 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2640 } 2641 2642 const Expr *Init = VD->getAnyInitializer(); 2643 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2644 llvm::Constant *Ctor; 2645 llvm::Constant *ID; 2646 if (CGM.getLangOpts().OpenMPIsDevice) { 2647 // Generate function that re-emits the declaration's initializer into 2648 // the threadprivate copy of the variable VD 2649 CodeGenFunction CtorCGF(CGM); 2650 2651 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2652 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2653 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2654 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2655 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2656 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2657 FunctionArgList(), Loc, Loc); 2658 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2659 CtorCGF.EmitAnyExprToMem(Init, 2660 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2661 Init->getType().getQualifiers(), 2662 /*IsInitializer=*/true); 2663 CtorCGF.FinishFunction(); 2664 Ctor = Fn; 2665 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2666 } else { 2667 Ctor = new llvm::GlobalVariable( 2668 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2669 llvm::GlobalValue::PrivateLinkage, 2670 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2671 ID = Ctor; 2672 } 2673 2674 // Register the information for the entry associated with the constructor. 2675 Out.clear(); 2676 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2677 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2678 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2679 } 2680 if (VD->getType().isDestructedType() != QualType::DK_none) { 2681 llvm::Constant *Dtor; 2682 llvm::Constant *ID; 2683 if (CGM.getLangOpts().OpenMPIsDevice) { 2684 // Generate function that emits destructor call for the threadprivate 2685 // copy of the variable VD 2686 CodeGenFunction DtorCGF(CGM); 2687 2688 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2689 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2690 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2691 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2692 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2693 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2694 FunctionArgList(), Loc, Loc); 2695 // Create a scope with an artificial location for the body of this 2696 // function. 2697 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2698 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2699 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2700 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2701 DtorCGF.FinishFunction(); 2702 Dtor = Fn; 2703 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2704 } else { 2705 Dtor = new llvm::GlobalVariable( 2706 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2707 llvm::GlobalValue::PrivateLinkage, 2708 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2709 ID = Dtor; 2710 } 2711 // Register the information for the entry associated with the destructor. 2712 Out.clear(); 2713 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2714 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2715 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2716 } 2717 return CGM.getLangOpts().OpenMPIsDevice; 2718 } 2719 2720 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2721 QualType VarType, 2722 StringRef Name) { 2723 llvm::Twine VarName(Name, ".artificial."); 2724 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2725 llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); 2726 llvm::Value *Args[] = { 2727 emitUpdateLocation(CGF, SourceLocation()), 2728 getThreadID(CGF, SourceLocation()), 2729 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2730 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2731 /*IsSigned=*/false), 2732 getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; 2733 return Address( 2734 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2735 CGF.EmitRuntimeCall( 2736 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2737 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2738 CGM.getPointerAlign()); 2739 } 2740 2741 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 2742 /// function. Here is the logic: 2743 /// if (Cond) { 2744 /// ThenGen(); 2745 /// } else { 2746 /// ElseGen(); 2747 /// } 2748 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2749 const RegionCodeGenTy &ThenGen, 2750 const RegionCodeGenTy &ElseGen) { 2751 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2752 2753 // If the condition constant folds and can be elided, try to avoid emitting 2754 // the condition and the dead arm of the if/else. 2755 bool CondConstant; 2756 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2757 if (CondConstant) 2758 ThenGen(CGF); 2759 else 2760 ElseGen(CGF); 2761 return; 2762 } 2763 2764 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2765 // emit the conditional branch. 2766 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2767 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2768 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2769 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2770 2771 // Emit the 'then' code. 2772 CGF.EmitBlock(ThenBlock); 2773 ThenGen(CGF); 2774 CGF.EmitBranch(ContBlock); 2775 // Emit the 'else' code if present. 2776 // There is no need to emit line number for unconditional branch. 2777 (void)ApplyDebugLocation::CreateEmpty(CGF); 2778 CGF.EmitBlock(ElseBlock); 2779 ElseGen(CGF); 2780 // There is no need to emit line number for unconditional branch. 2781 (void)ApplyDebugLocation::CreateEmpty(CGF); 2782 CGF.EmitBranch(ContBlock); 2783 // Emit the continuation block for code after the if. 2784 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2785 } 2786 2787 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2788 llvm::Value *OutlinedFn, 2789 ArrayRef<llvm::Value *> CapturedVars, 2790 const Expr *IfCond) { 2791 if (!CGF.HaveInsertPoint()) 2792 return; 2793 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2794 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2795 PrePostActionTy &) { 2796 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2797 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2798 llvm::Value *Args[] = { 2799 RTLoc, 2800 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2801 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2802 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2803 RealArgs.append(std::begin(Args), std::end(Args)); 2804 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2805 2806 llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2807 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2808 }; 2809 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2810 PrePostActionTy &) { 2811 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2812 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2813 // Build calls: 2814 // __kmpc_serialized_parallel(&Loc, GTid); 2815 llvm::Value *Args[] = {RTLoc, ThreadID}; 2816 CGF.EmitRuntimeCall( 2817 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2818 2819 // OutlinedFn(>id, &zero, CapturedStruct); 2820 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2821 Address ZeroAddr = 2822 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 2823 /*Name*/ ".zero.addr"); 2824 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2825 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2826 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2827 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2828 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2829 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2830 2831 // __kmpc_end_serialized_parallel(&Loc, GTid); 2832 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2833 CGF.EmitRuntimeCall( 2834 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2835 EndArgs); 2836 }; 2837 if (IfCond) { 2838 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2839 } else { 2840 RegionCodeGenTy ThenRCG(ThenGen); 2841 ThenRCG(CGF); 2842 } 2843 } 2844 2845 // If we're inside an (outlined) parallel region, use the region info's 2846 // thread-ID variable (it is passed in a first argument of the outlined function 2847 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2848 // regular serial code region, get thread ID by calling kmp_int32 2849 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2850 // return the address of that temp. 2851 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2852 SourceLocation Loc) { 2853 if (auto *OMPRegionInfo = 2854 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2855 if (OMPRegionInfo->getThreadIDVariable()) 2856 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2857 2858 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2859 QualType Int32Ty = 2860 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2861 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2862 CGF.EmitStoreOfScalar(ThreadID, 2863 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2864 2865 return ThreadIDTemp; 2866 } 2867 2868 llvm::Constant * 2869 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2870 const llvm::Twine &Name) { 2871 SmallString<256> Buffer; 2872 llvm::raw_svector_ostream Out(Buffer); 2873 Out << Name; 2874 StringRef RuntimeName = Out.str(); 2875 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2876 if (Elem.second) { 2877 assert(Elem.second->getType()->getPointerElementType() == Ty && 2878 "OMP internal variable has different type than requested"); 2879 return &*Elem.second; 2880 } 2881 2882 return Elem.second = new llvm::GlobalVariable( 2883 CGM.getModule(), Ty, /*IsConstant*/ false, 2884 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2885 Elem.first()); 2886 } 2887 2888 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2889 llvm::Twine Name(".gomp_critical_user_", CriticalName); 2890 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 2891 } 2892 2893 namespace { 2894 /// Common pre(post)-action for different OpenMP constructs. 2895 class CommonActionTy final : public PrePostActionTy { 2896 llvm::Value *EnterCallee; 2897 ArrayRef<llvm::Value *> EnterArgs; 2898 llvm::Value *ExitCallee; 2899 ArrayRef<llvm::Value *> ExitArgs; 2900 bool Conditional; 2901 llvm::BasicBlock *ContBlock = nullptr; 2902 2903 public: 2904 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2905 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2906 bool Conditional = false) 2907 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2908 ExitArgs(ExitArgs), Conditional(Conditional) {} 2909 void Enter(CodeGenFunction &CGF) override { 2910 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2911 if (Conditional) { 2912 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2913 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2914 ContBlock = CGF.createBasicBlock("omp_if.end"); 2915 // Generate the branch (If-stmt) 2916 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2917 CGF.EmitBlock(ThenBlock); 2918 } 2919 } 2920 void Done(CodeGenFunction &CGF) { 2921 // Emit the rest of blocks/branches 2922 CGF.EmitBranch(ContBlock); 2923 CGF.EmitBlock(ContBlock, true); 2924 } 2925 void Exit(CodeGenFunction &CGF) override { 2926 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2927 } 2928 }; 2929 } // anonymous namespace 2930 2931 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2932 StringRef CriticalName, 2933 const RegionCodeGenTy &CriticalOpGen, 2934 SourceLocation Loc, const Expr *Hint) { 2935 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2936 // CriticalOpGen(); 2937 // __kmpc_end_critical(ident_t *, gtid, Lock); 2938 // Prepare arguments and build a call to __kmpc_critical 2939 if (!CGF.HaveInsertPoint()) 2940 return; 2941 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2942 getCriticalRegionLock(CriticalName)}; 2943 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2944 std::end(Args)); 2945 if (Hint) { 2946 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2947 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2948 } 2949 CommonActionTy Action( 2950 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2951 : OMPRTL__kmpc_critical), 2952 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2953 CriticalOpGen.setAction(Action); 2954 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2955 } 2956 2957 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2958 const RegionCodeGenTy &MasterOpGen, 2959 SourceLocation Loc) { 2960 if (!CGF.HaveInsertPoint()) 2961 return; 2962 // if(__kmpc_master(ident_t *, gtid)) { 2963 // MasterOpGen(); 2964 // __kmpc_end_master(ident_t *, gtid); 2965 // } 2966 // Prepare arguments and build a call to __kmpc_master 2967 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2968 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2969 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2970 /*Conditional=*/true); 2971 MasterOpGen.setAction(Action); 2972 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2973 Action.Done(CGF); 2974 } 2975 2976 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2977 SourceLocation Loc) { 2978 if (!CGF.HaveInsertPoint()) 2979 return; 2980 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2981 llvm::Value *Args[] = { 2982 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2983 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2984 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2985 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2986 Region->emitUntiedSwitch(CGF); 2987 } 2988 2989 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2990 const RegionCodeGenTy &TaskgroupOpGen, 2991 SourceLocation Loc) { 2992 if (!CGF.HaveInsertPoint()) 2993 return; 2994 // __kmpc_taskgroup(ident_t *, gtid); 2995 // TaskgroupOpGen(); 2996 // __kmpc_end_taskgroup(ident_t *, gtid); 2997 // Prepare arguments and build a call to __kmpc_taskgroup 2998 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2999 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3000 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3001 Args); 3002 TaskgroupOpGen.setAction(Action); 3003 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3004 } 3005 3006 /// Given an array of pointers to variables, project the address of a 3007 /// given variable. 3008 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3009 unsigned Index, const VarDecl *Var) { 3010 // Pull out the pointer to the variable. 3011 Address PtrAddr = 3012 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 3013 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3014 3015 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3016 Addr = CGF.Builder.CreateElementBitCast( 3017 Addr, CGF.ConvertTypeForMem(Var->getType())); 3018 return Addr; 3019 } 3020 3021 static llvm::Value *emitCopyprivateCopyFunction( 3022 CodeGenModule &CGM, llvm::Type *ArgsType, 3023 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3024 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3025 SourceLocation Loc) { 3026 ASTContext &C = CGM.getContext(); 3027 // void copy_func(void *LHSArg, void *RHSArg); 3028 FunctionArgList Args; 3029 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3030 ImplicitParamDecl::Other); 3031 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3032 ImplicitParamDecl::Other); 3033 Args.push_back(&LHSArg); 3034 Args.push_back(&RHSArg); 3035 const auto &CGFI = 3036 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3037 auto *Fn = llvm::Function::Create( 3038 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 3039 ".omp.copyprivate.copy_func", &CGM.getModule()); 3040 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3041 Fn->setDoesNotRecurse(); 3042 CodeGenFunction CGF(CGM); 3043 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3044 // Dest = (void*[n])(LHSArg); 3045 // Src = (void*[n])(RHSArg); 3046 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3047 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3048 ArgsType), CGF.getPointerAlign()); 3049 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3050 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3051 ArgsType), CGF.getPointerAlign()); 3052 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3053 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3054 // ... 3055 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3056 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3057 const auto *DestVar = 3058 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3059 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3060 3061 const auto *SrcVar = 3062 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3063 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3064 3065 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3066 QualType Type = VD->getType(); 3067 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3068 } 3069 CGF.FinishFunction(); 3070 return Fn; 3071 } 3072 3073 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3074 const RegionCodeGenTy &SingleOpGen, 3075 SourceLocation Loc, 3076 ArrayRef<const Expr *> CopyprivateVars, 3077 ArrayRef<const Expr *> SrcExprs, 3078 ArrayRef<const Expr *> DstExprs, 3079 ArrayRef<const Expr *> AssignmentOps) { 3080 if (!CGF.HaveInsertPoint()) 3081 return; 3082 assert(CopyprivateVars.size() == SrcExprs.size() && 3083 CopyprivateVars.size() == DstExprs.size() && 3084 CopyprivateVars.size() == AssignmentOps.size()); 3085 ASTContext &C = CGM.getContext(); 3086 // int32 did_it = 0; 3087 // if(__kmpc_single(ident_t *, gtid)) { 3088 // SingleOpGen(); 3089 // __kmpc_end_single(ident_t *, gtid); 3090 // did_it = 1; 3091 // } 3092 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3093 // <copy_func>, did_it); 3094 3095 Address DidIt = Address::invalid(); 3096 if (!CopyprivateVars.empty()) { 3097 // int32 did_it = 0; 3098 QualType KmpInt32Ty = 3099 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3100 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3101 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3102 } 3103 // Prepare arguments and build a call to __kmpc_single 3104 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3105 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3106 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3107 /*Conditional=*/true); 3108 SingleOpGen.setAction(Action); 3109 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3110 if (DidIt.isValid()) { 3111 // did_it = 1; 3112 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3113 } 3114 Action.Done(CGF); 3115 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3116 // <copy_func>, did_it); 3117 if (DidIt.isValid()) { 3118 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3119 QualType CopyprivateArrayTy = 3120 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3121 /*IndexTypeQuals=*/0); 3122 // Create a list of all private variables for copyprivate. 3123 Address CopyprivateList = 3124 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3125 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3126 Address Elem = CGF.Builder.CreateConstArrayGEP( 3127 CopyprivateList, I, CGF.getPointerSize()); 3128 CGF.Builder.CreateStore( 3129 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3130 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3131 Elem); 3132 } 3133 // Build function that copies private values from single region to all other 3134 // threads in the corresponding parallel region. 3135 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3136 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3137 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3138 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3139 Address CL = 3140 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3141 CGF.VoidPtrTy); 3142 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3143 llvm::Value *Args[] = { 3144 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3145 getThreadID(CGF, Loc), // i32 <gtid> 3146 BufSize, // size_t <buf_size> 3147 CL.getPointer(), // void *<copyprivate list> 3148 CpyFn, // void (*) (void *, void *) <copy_func> 3149 DidItVal // i32 did_it 3150 }; 3151 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3152 } 3153 } 3154 3155 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3156 const RegionCodeGenTy &OrderedOpGen, 3157 SourceLocation Loc, bool IsThreads) { 3158 if (!CGF.HaveInsertPoint()) 3159 return; 3160 // __kmpc_ordered(ident_t *, gtid); 3161 // OrderedOpGen(); 3162 // __kmpc_end_ordered(ident_t *, gtid); 3163 // Prepare arguments and build a call to __kmpc_ordered 3164 if (IsThreads) { 3165 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3166 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3167 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3168 Args); 3169 OrderedOpGen.setAction(Action); 3170 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3171 return; 3172 } 3173 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3174 } 3175 3176 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3177 OpenMPDirectiveKind Kind, bool EmitChecks, 3178 bool ForceSimpleCall) { 3179 if (!CGF.HaveInsertPoint()) 3180 return; 3181 // Build call __kmpc_cancel_barrier(loc, thread_id); 3182 // Build call __kmpc_barrier(loc, thread_id); 3183 unsigned Flags; 3184 if (Kind == OMPD_for) 3185 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3186 else if (Kind == OMPD_sections) 3187 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3188 else if (Kind == OMPD_single) 3189 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3190 else if (Kind == OMPD_barrier) 3191 Flags = OMP_IDENT_BARRIER_EXPL; 3192 else 3193 Flags = OMP_IDENT_BARRIER_IMPL; 3194 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3195 // thread_id); 3196 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3197 getThreadID(CGF, Loc)}; 3198 if (auto *OMPRegionInfo = 3199 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3200 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3201 llvm::Value *Result = CGF.EmitRuntimeCall( 3202 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3203 if (EmitChecks) { 3204 // if (__kmpc_cancel_barrier()) { 3205 // exit from construct; 3206 // } 3207 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3208 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3209 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3210 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3211 CGF.EmitBlock(ExitBB); 3212 // exit from construct; 3213 CodeGenFunction::JumpDest CancelDestination = 3214 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3215 CGF.EmitBranchThroughCleanup(CancelDestination); 3216 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3217 } 3218 return; 3219 } 3220 } 3221 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3222 } 3223 3224 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 3225 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3226 bool Chunked, bool Ordered) { 3227 switch (ScheduleKind) { 3228 case OMPC_SCHEDULE_static: 3229 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3230 : (Ordered ? OMP_ord_static : OMP_sch_static); 3231 case OMPC_SCHEDULE_dynamic: 3232 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3233 case OMPC_SCHEDULE_guided: 3234 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3235 case OMPC_SCHEDULE_runtime: 3236 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3237 case OMPC_SCHEDULE_auto: 3238 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3239 case OMPC_SCHEDULE_unknown: 3240 assert(!Chunked && "chunk was specified but schedule kind not known"); 3241 return Ordered ? OMP_ord_static : OMP_sch_static; 3242 } 3243 llvm_unreachable("Unexpected runtime schedule"); 3244 } 3245 3246 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 3247 static OpenMPSchedType 3248 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3249 // only static is allowed for dist_schedule 3250 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3251 } 3252 3253 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3254 bool Chunked) const { 3255 OpenMPSchedType Schedule = 3256 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3257 return Schedule == OMP_sch_static; 3258 } 3259 3260 bool CGOpenMPRuntime::isStaticNonchunked( 3261 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3262 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3263 return Schedule == OMP_dist_sch_static; 3264 } 3265 3266 3267 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3268 OpenMPSchedType Schedule = 3269 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3270 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3271 return Schedule != OMP_sch_static; 3272 } 3273 3274 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3275 OpenMPScheduleClauseModifier M1, 3276 OpenMPScheduleClauseModifier M2) { 3277 int Modifier = 0; 3278 switch (M1) { 3279 case OMPC_SCHEDULE_MODIFIER_monotonic: 3280 Modifier = OMP_sch_modifier_monotonic; 3281 break; 3282 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3283 Modifier = OMP_sch_modifier_nonmonotonic; 3284 break; 3285 case OMPC_SCHEDULE_MODIFIER_simd: 3286 if (Schedule == OMP_sch_static_chunked) 3287 Schedule = OMP_sch_static_balanced_chunked; 3288 break; 3289 case OMPC_SCHEDULE_MODIFIER_last: 3290 case OMPC_SCHEDULE_MODIFIER_unknown: 3291 break; 3292 } 3293 switch (M2) { 3294 case OMPC_SCHEDULE_MODIFIER_monotonic: 3295 Modifier = OMP_sch_modifier_monotonic; 3296 break; 3297 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3298 Modifier = OMP_sch_modifier_nonmonotonic; 3299 break; 3300 case OMPC_SCHEDULE_MODIFIER_simd: 3301 if (Schedule == OMP_sch_static_chunked) 3302 Schedule = OMP_sch_static_balanced_chunked; 3303 break; 3304 case OMPC_SCHEDULE_MODIFIER_last: 3305 case OMPC_SCHEDULE_MODIFIER_unknown: 3306 break; 3307 } 3308 return Schedule | Modifier; 3309 } 3310 3311 void CGOpenMPRuntime::emitForDispatchInit( 3312 CodeGenFunction &CGF, SourceLocation Loc, 3313 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3314 bool Ordered, const DispatchRTInput &DispatchValues) { 3315 if (!CGF.HaveInsertPoint()) 3316 return; 3317 OpenMPSchedType Schedule = getRuntimeSchedule( 3318 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3319 assert(Ordered || 3320 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3321 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3322 Schedule != OMP_sch_static_balanced_chunked)); 3323 // Call __kmpc_dispatch_init( 3324 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3325 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3326 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3327 3328 // If the Chunk was not specified in the clause - use default value 1. 3329 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3330 : CGF.Builder.getIntN(IVSize, 1); 3331 llvm::Value *Args[] = { 3332 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3333 CGF.Builder.getInt32(addMonoNonMonoModifier( 3334 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3335 DispatchValues.LB, // Lower 3336 DispatchValues.UB, // Upper 3337 CGF.Builder.getIntN(IVSize, 1), // Stride 3338 Chunk // Chunk 3339 }; 3340 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3341 } 3342 3343 static void emitForStaticInitCall( 3344 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3345 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 3346 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3347 const CGOpenMPRuntime::StaticRTInput &Values) { 3348 if (!CGF.HaveInsertPoint()) 3349 return; 3350 3351 assert(!Values.Ordered); 3352 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3353 Schedule == OMP_sch_static_balanced_chunked || 3354 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3355 Schedule == OMP_dist_sch_static || 3356 Schedule == OMP_dist_sch_static_chunked); 3357 3358 // Call __kmpc_for_static_init( 3359 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3360 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3361 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3362 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3363 llvm::Value *Chunk = Values.Chunk; 3364 if (Chunk == nullptr) { 3365 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3366 Schedule == OMP_dist_sch_static) && 3367 "expected static non-chunked schedule"); 3368 // If the Chunk was not specified in the clause - use default value 1. 3369 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3370 } else { 3371 assert((Schedule == OMP_sch_static_chunked || 3372 Schedule == OMP_sch_static_balanced_chunked || 3373 Schedule == OMP_ord_static_chunked || 3374 Schedule == OMP_dist_sch_static_chunked) && 3375 "expected static chunked schedule"); 3376 } 3377 llvm::Value *Args[] = { 3378 UpdateLocation, 3379 ThreadId, 3380 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3381 M2)), // Schedule type 3382 Values.IL.getPointer(), // &isLastIter 3383 Values.LB.getPointer(), // &LB 3384 Values.UB.getPointer(), // &UB 3385 Values.ST.getPointer(), // &Stride 3386 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3387 Chunk // Chunk 3388 }; 3389 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3390 } 3391 3392 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3393 SourceLocation Loc, 3394 OpenMPDirectiveKind DKind, 3395 const OpenMPScheduleTy &ScheduleKind, 3396 const StaticRTInput &Values) { 3397 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3398 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3399 assert(isOpenMPWorksharingDirective(DKind) && 3400 "Expected loop-based or sections-based directive."); 3401 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3402 isOpenMPLoopDirective(DKind) 3403 ? OMP_IDENT_WORK_LOOP 3404 : OMP_IDENT_WORK_SECTIONS); 3405 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3406 llvm::Constant *StaticInitFunction = 3407 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3408 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3409 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3410 } 3411 3412 void CGOpenMPRuntime::emitDistributeStaticInit( 3413 CodeGenFunction &CGF, SourceLocation Loc, 3414 OpenMPDistScheduleClauseKind SchedKind, 3415 const CGOpenMPRuntime::StaticRTInput &Values) { 3416 OpenMPSchedType ScheduleNum = 3417 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3418 llvm::Value *UpdatedLocation = 3419 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3420 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3421 llvm::Constant *StaticInitFunction = 3422 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3423 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3424 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3425 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3426 } 3427 3428 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3429 SourceLocation Loc, 3430 OpenMPDirectiveKind DKind) { 3431 if (!CGF.HaveInsertPoint()) 3432 return; 3433 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3434 llvm::Value *Args[] = { 3435 emitUpdateLocation(CGF, Loc, 3436 isOpenMPDistributeDirective(DKind) 3437 ? OMP_IDENT_WORK_DISTRIBUTE 3438 : isOpenMPLoopDirective(DKind) 3439 ? OMP_IDENT_WORK_LOOP 3440 : OMP_IDENT_WORK_SECTIONS), 3441 getThreadID(CGF, Loc)}; 3442 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3443 Args); 3444 } 3445 3446 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3447 SourceLocation Loc, 3448 unsigned IVSize, 3449 bool IVSigned) { 3450 if (!CGF.HaveInsertPoint()) 3451 return; 3452 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3453 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3454 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3455 } 3456 3457 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3458 SourceLocation Loc, unsigned IVSize, 3459 bool IVSigned, Address IL, 3460 Address LB, Address UB, 3461 Address ST) { 3462 // Call __kmpc_dispatch_next( 3463 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3464 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3465 // kmp_int[32|64] *p_stride); 3466 llvm::Value *Args[] = { 3467 emitUpdateLocation(CGF, Loc), 3468 getThreadID(CGF, Loc), 3469 IL.getPointer(), // &isLastIter 3470 LB.getPointer(), // &Lower 3471 UB.getPointer(), // &Upper 3472 ST.getPointer() // &Stride 3473 }; 3474 llvm::Value *Call = 3475 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3476 return CGF.EmitScalarConversion( 3477 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3478 CGF.getContext().BoolTy, Loc); 3479 } 3480 3481 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3482 llvm::Value *NumThreads, 3483 SourceLocation Loc) { 3484 if (!CGF.HaveInsertPoint()) 3485 return; 3486 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3487 llvm::Value *Args[] = { 3488 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3489 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3490 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3491 Args); 3492 } 3493 3494 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3495 OpenMPProcBindClauseKind ProcBind, 3496 SourceLocation Loc) { 3497 if (!CGF.HaveInsertPoint()) 3498 return; 3499 // Constants for proc bind value accepted by the runtime. 3500 enum ProcBindTy { 3501 ProcBindFalse = 0, 3502 ProcBindTrue, 3503 ProcBindMaster, 3504 ProcBindClose, 3505 ProcBindSpread, 3506 ProcBindIntel, 3507 ProcBindDefault 3508 } RuntimeProcBind; 3509 switch (ProcBind) { 3510 case OMPC_PROC_BIND_master: 3511 RuntimeProcBind = ProcBindMaster; 3512 break; 3513 case OMPC_PROC_BIND_close: 3514 RuntimeProcBind = ProcBindClose; 3515 break; 3516 case OMPC_PROC_BIND_spread: 3517 RuntimeProcBind = ProcBindSpread; 3518 break; 3519 case OMPC_PROC_BIND_unknown: 3520 llvm_unreachable("Unsupported proc_bind value."); 3521 } 3522 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3523 llvm::Value *Args[] = { 3524 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3525 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3526 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3527 } 3528 3529 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3530 SourceLocation Loc) { 3531 if (!CGF.HaveInsertPoint()) 3532 return; 3533 // Build call void __kmpc_flush(ident_t *loc) 3534 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3535 emitUpdateLocation(CGF, Loc)); 3536 } 3537 3538 namespace { 3539 /// \brief Indexes of fields for type kmp_task_t. 3540 enum KmpTaskTFields { 3541 /// \brief List of shared variables. 3542 KmpTaskTShareds, 3543 /// \brief Task routine. 3544 KmpTaskTRoutine, 3545 /// \brief Partition id for the untied tasks. 3546 KmpTaskTPartId, 3547 /// Function with call of destructors for private variables. 3548 Data1, 3549 /// Task priority. 3550 Data2, 3551 /// (Taskloops only) Lower bound. 3552 KmpTaskTLowerBound, 3553 /// (Taskloops only) Upper bound. 3554 KmpTaskTUpperBound, 3555 /// (Taskloops only) Stride. 3556 KmpTaskTStride, 3557 /// (Taskloops only) Is last iteration flag. 3558 KmpTaskTLastIter, 3559 /// (Taskloops only) Reduction data. 3560 KmpTaskTReductions, 3561 }; 3562 } // anonymous namespace 3563 3564 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3565 return OffloadEntriesTargetRegion.empty() && 3566 OffloadEntriesDeviceGlobalVar.empty(); 3567 } 3568 3569 /// \brief Initialize target region entry. 3570 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3571 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3572 StringRef ParentName, unsigned LineNum, 3573 unsigned Order) { 3574 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3575 "only required for the device " 3576 "code generation."); 3577 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3578 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3579 OMPTargetRegionEntryTargetRegion); 3580 ++OffloadingEntriesNum; 3581 } 3582 3583 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3584 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3585 StringRef ParentName, unsigned LineNum, 3586 llvm::Constant *Addr, llvm::Constant *ID, 3587 OMPTargetRegionEntryKind Flags) { 3588 // If we are emitting code for a target, the entry is already initialized, 3589 // only has to be registered. 3590 if (CGM.getLangOpts().OpenMPIsDevice) { 3591 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3592 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3593 DiagnosticsEngine::Error, 3594 "Unable to find target region on line '%0' in the device code."); 3595 CGM.getDiags().Report(DiagID) << LineNum; 3596 return; 3597 } 3598 auto &Entry = 3599 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3600 assert(Entry.isValid() && "Entry not initialized!"); 3601 Entry.setAddress(Addr); 3602 Entry.setID(ID); 3603 Entry.setFlags(Flags); 3604 } else { 3605 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3606 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3607 ++OffloadingEntriesNum; 3608 } 3609 } 3610 3611 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3612 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3613 unsigned LineNum) const { 3614 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3615 if (PerDevice == OffloadEntriesTargetRegion.end()) 3616 return false; 3617 auto PerFile = PerDevice->second.find(FileID); 3618 if (PerFile == PerDevice->second.end()) 3619 return false; 3620 auto PerParentName = PerFile->second.find(ParentName); 3621 if (PerParentName == PerFile->second.end()) 3622 return false; 3623 auto PerLine = PerParentName->second.find(LineNum); 3624 if (PerLine == PerParentName->second.end()) 3625 return false; 3626 // Fail if this entry is already registered. 3627 if (PerLine->second.getAddress() || PerLine->second.getID()) 3628 return false; 3629 return true; 3630 } 3631 3632 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3633 const OffloadTargetRegionEntryInfoActTy &Action) { 3634 // Scan all target region entries and perform the provided action. 3635 for (const auto &D : OffloadEntriesTargetRegion) 3636 for (const auto &F : D.second) 3637 for (const auto &P : F.second) 3638 for (const auto &L : P.second) 3639 Action(D.first, F.first, P.first(), L.first, L.second); 3640 } 3641 3642 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3643 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3644 OMPTargetGlobalVarEntryKind Flags, 3645 unsigned Order) { 3646 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3647 "only required for the device " 3648 "code generation."); 3649 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3650 ++OffloadingEntriesNum; 3651 } 3652 3653 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3654 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3655 CharUnits VarSize, 3656 OMPTargetGlobalVarEntryKind Flags, 3657 llvm::GlobalValue::LinkageTypes Linkage) { 3658 if (CGM.getLangOpts().OpenMPIsDevice) { 3659 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3660 assert(Entry.isValid() && Entry.getFlags() == Flags && 3661 "Entry not initialized!"); 3662 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3663 "Resetting with the new address."); 3664 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) 3665 return; 3666 Entry.setAddress(Addr); 3667 Entry.setVarSize(VarSize); 3668 Entry.setLinkage(Linkage); 3669 } else { 3670 if (hasDeviceGlobalVarEntryInfo(VarName)) 3671 return; 3672 OffloadEntriesDeviceGlobalVar.try_emplace( 3673 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3674 ++OffloadingEntriesNum; 3675 } 3676 } 3677 3678 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3679 actOnDeviceGlobalVarEntriesInfo( 3680 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3681 // Scan all target region entries and perform the provided action. 3682 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3683 Action(E.getKey(), E.getValue()); 3684 } 3685 3686 llvm::Function * 3687 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3688 // If we don't have entries or if we are emitting code for the device, we 3689 // don't need to do anything. 3690 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3691 return nullptr; 3692 3693 llvm::Module &M = CGM.getModule(); 3694 ASTContext &C = CGM.getContext(); 3695 3696 // Get list of devices we care about 3697 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3698 3699 // We should be creating an offloading descriptor only if there are devices 3700 // specified. 3701 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3702 3703 // Create the external variables that will point to the begin and end of the 3704 // host entries section. These will be defined by the linker. 3705 llvm::Type *OffloadEntryTy = 3706 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3707 auto *HostEntriesBegin = new llvm::GlobalVariable( 3708 M, OffloadEntryTy, /*isConstant=*/true, 3709 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3710 ".omp_offloading.entries_begin"); 3711 auto *HostEntriesEnd = new llvm::GlobalVariable( 3712 M, OffloadEntryTy, /*isConstant=*/true, 3713 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3714 ".omp_offloading.entries_end"); 3715 3716 // Create all device images 3717 auto *DeviceImageTy = cast<llvm::StructType>( 3718 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3719 ConstantInitBuilder DeviceImagesBuilder(CGM); 3720 ConstantArrayBuilder DeviceImagesEntries = 3721 DeviceImagesBuilder.beginArray(DeviceImageTy); 3722 3723 for (const llvm::Triple &Device : Devices) { 3724 StringRef T = Device.getTriple(); 3725 auto *ImgBegin = new llvm::GlobalVariable( 3726 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 3727 /*Initializer=*/nullptr, Twine(".omp_offloading.img_start.", T)); 3728 auto *ImgEnd = new llvm::GlobalVariable( 3729 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 3730 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.", T)); 3731 3732 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3733 HostEntriesEnd}; 3734 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3735 DeviceImagesEntries); 3736 } 3737 3738 // Create device images global array. 3739 llvm::GlobalVariable *DeviceImages = 3740 DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", 3741 CGM.getPointerAlign(), 3742 /*isConstant=*/true); 3743 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3744 3745 // This is a Zero array to be used in the creation of the constant expressions 3746 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3747 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3748 3749 // Create the target region descriptor. 3750 llvm::Constant *Data[] = { 3751 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 3752 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3753 DeviceImages, Index), 3754 HostEntriesBegin, HostEntriesEnd}; 3755 llvm::GlobalVariable *Desc = createConstantGlobalStruct( 3756 CGM, getTgtBinaryDescriptorQTy(), Data, ".omp_offloading.descriptor"); 3757 3758 // Emit code to register or unregister the descriptor at execution 3759 // startup or closing, respectively. 3760 3761 llvm::Function *UnRegFn; 3762 { 3763 FunctionArgList Args; 3764 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3765 Args.push_back(&DummyPtr); 3766 3767 CodeGenFunction CGF(CGM); 3768 // Disable debug info for global (de-)initializer because they are not part 3769 // of some particular construct. 3770 CGF.disableDebugInfo(); 3771 const auto &FI = 3772 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3773 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3774 UnRegFn = CGM.CreateGlobalInitOrDestructFunction( 3775 FTy, ".omp_offloading.descriptor_unreg", FI); 3776 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 3777 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3778 Desc); 3779 CGF.FinishFunction(); 3780 } 3781 llvm::Function *RegFn; 3782 { 3783 CodeGenFunction CGF(CGM); 3784 // Disable debug info for global (de-)initializer because they are not part 3785 // of some particular construct. 3786 CGF.disableDebugInfo(); 3787 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 3788 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3789 RegFn = CGM.CreateGlobalInitOrDestructFunction( 3790 FTy, ".omp_offloading.descriptor_reg", FI); 3791 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 3792 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 3793 // Create a variable to drive the registration and unregistration of the 3794 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3795 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 3796 SourceLocation(), nullptr, C.CharTy, 3797 ImplicitParamDecl::Other); 3798 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3799 CGF.FinishFunction(); 3800 } 3801 if (CGM.supportsCOMDAT()) { 3802 // It is sufficient to call registration function only once, so create a 3803 // COMDAT group for registration/unregistration functions and associated 3804 // data. That would reduce startup time and code size. Registration 3805 // function serves as a COMDAT group key. 3806 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3807 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3808 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3809 RegFn->setComdat(ComdatKey); 3810 UnRegFn->setComdat(ComdatKey); 3811 DeviceImages->setComdat(ComdatKey); 3812 Desc->setComdat(ComdatKey); 3813 } 3814 return RegFn; 3815 } 3816 3817 void CGOpenMPRuntime::createOffloadEntry( 3818 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3819 llvm::GlobalValue::LinkageTypes Linkage) { 3820 StringRef Name = Addr->getName(); 3821 llvm::Module &M = CGM.getModule(); 3822 llvm::LLVMContext &C = M.getContext(); 3823 3824 // Create constant string with the name. 3825 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3826 3827 auto *Str = 3828 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 3829 llvm::GlobalValue::InternalLinkage, StrPtrInit, 3830 ".omp_offloading.entry_name"); 3831 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3832 3833 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3834 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3835 llvm::ConstantInt::get(CGM.SizeTy, Size), 3836 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3837 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3838 llvm::GlobalVariable *Entry = createConstantGlobalStruct( 3839 CGM, getTgtOffloadEntryQTy(), Data, Twine(".omp_offloading.entry.", Name), 3840 Linkage); 3841 3842 // The entry has to be created in the section the linker expects it to be. 3843 Entry->setSection(".omp_offloading.entries"); 3844 } 3845 3846 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3847 // Emit the offloading entries and metadata so that the device codegen side 3848 // can easily figure out what to emit. The produced metadata looks like 3849 // this: 3850 // 3851 // !omp_offload.info = !{!1, ...} 3852 // 3853 // Right now we only generate metadata for function that contain target 3854 // regions. 3855 3856 // If we do not have entries, we don't need to do anything. 3857 if (OffloadEntriesInfoManager.empty()) 3858 return; 3859 3860 llvm::Module &M = CGM.getModule(); 3861 llvm::LLVMContext &C = M.getContext(); 3862 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 3863 OrderedEntries(OffloadEntriesInfoManager.size()); 3864 3865 // Auxiliary methods to create metadata values and strings. 3866 auto &&GetMDInt = [this](unsigned V) { 3867 return llvm::ConstantAsMetadata::get( 3868 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3869 }; 3870 3871 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3872 3873 // Create the offloading info metadata node. 3874 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3875 3876 // Create function that emits metadata for each target region entry; 3877 auto &&TargetRegionMetadataEmitter = 3878 [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( 3879 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3880 unsigned Line, 3881 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3882 // Generate metadata for target regions. Each entry of this metadata 3883 // contains: 3884 // - Entry 0 -> Kind of this type of metadata (0). 3885 // - Entry 1 -> Device ID of the file where the entry was identified. 3886 // - Entry 2 -> File ID of the file where the entry was identified. 3887 // - Entry 3 -> Mangled name of the function where the entry was 3888 // identified. 3889 // - Entry 4 -> Line in the file where the entry was identified. 3890 // - Entry 5 -> Order the entry was created. 3891 // The first element of the metadata node is the kind. 3892 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3893 GetMDInt(FileID), GetMDString(ParentName), 3894 GetMDInt(Line), GetMDInt(E.getOrder())}; 3895 3896 // Save this entry in the right position of the ordered entries array. 3897 OrderedEntries[E.getOrder()] = &E; 3898 3899 // Add metadata to the named metadata node. 3900 MD->addOperand(llvm::MDNode::get(C, Ops)); 3901 }; 3902 3903 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3904 TargetRegionMetadataEmitter); 3905 3906 // Create function that emits metadata for each device global variable entry; 3907 auto &&DeviceGlobalVarMetadataEmitter = 3908 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3909 MD](StringRef MangledName, 3910 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3911 &E) { 3912 // Generate metadata for global variables. Each entry of this metadata 3913 // contains: 3914 // - Entry 0 -> Kind of this type of metadata (1). 3915 // - Entry 1 -> Mangled name of the variable. 3916 // - Entry 2 -> Declare target kind. 3917 // - Entry 3 -> Order the entry was created. 3918 // The first element of the metadata node is the kind. 3919 llvm::Metadata *Ops[] = { 3920 GetMDInt(E.getKind()), GetMDString(MangledName), 3921 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3922 3923 // Save this entry in the right position of the ordered entries array. 3924 OrderedEntries[E.getOrder()] = &E; 3925 3926 // Add metadata to the named metadata node. 3927 MD->addOperand(llvm::MDNode::get(C, Ops)); 3928 }; 3929 3930 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3931 DeviceGlobalVarMetadataEmitter); 3932 3933 for (const auto *E : OrderedEntries) { 3934 assert(E && "All ordered entries must exist!"); 3935 if (const auto *CE = 3936 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3937 E)) { 3938 if (!CE->getID() || !CE->getAddress()) { 3939 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3940 DiagnosticsEngine::Error, 3941 "Offloading entry for target region is incorect: either the " 3942 "address or the ID is invalid."); 3943 CGM.getDiags().Report(DiagID); 3944 continue; 3945 } 3946 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3947 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3948 } else if (const auto *CE = 3949 dyn_cast<OffloadEntriesInfoManagerTy:: 3950 OffloadEntryInfoDeviceGlobalVar>(E)) { 3951 if (!CE->getAddress()) { 3952 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3953 DiagnosticsEngine::Error, 3954 "Offloading entry for declare target varible is inccorect: the " 3955 "address is invalid."); 3956 CGM.getDiags().Report(DiagID); 3957 continue; 3958 } 3959 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3960 CE->getVarSize().getQuantity(), CE->getFlags(), 3961 CE->getLinkage()); 3962 } else { 3963 llvm_unreachable("Unsupported entry kind."); 3964 } 3965 } 3966 } 3967 3968 /// \brief Loads all the offload entries information from the host IR 3969 /// metadata. 3970 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3971 // If we are in target mode, load the metadata from the host IR. This code has 3972 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3973 3974 if (!CGM.getLangOpts().OpenMPIsDevice) 3975 return; 3976 3977 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3978 return; 3979 3980 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3981 if (auto EC = Buf.getError()) { 3982 CGM.getDiags().Report(diag::err_cannot_open_file) 3983 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3984 return; 3985 } 3986 3987 llvm::LLVMContext C; 3988 auto ME = expectedToErrorOrAndEmitErrors( 3989 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3990 3991 if (auto EC = ME.getError()) { 3992 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3993 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3994 CGM.getDiags().Report(DiagID) 3995 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3996 return; 3997 } 3998 3999 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4000 if (!MD) 4001 return; 4002 4003 for (llvm::MDNode *MN : MD->operands()) { 4004 auto &&GetMDInt = [MN](unsigned Idx) { 4005 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4006 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4007 }; 4008 4009 auto &&GetMDString = [MN](unsigned Idx) { 4010 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4011 return V->getString(); 4012 }; 4013 4014 switch (GetMDInt(0)) { 4015 default: 4016 llvm_unreachable("Unexpected metadata!"); 4017 break; 4018 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4019 OffloadingEntryInfoTargetRegion: 4020 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4021 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4022 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4023 /*Order=*/GetMDInt(5)); 4024 break; 4025 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4026 OffloadingEntryInfoDeviceGlobalVar: 4027 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4028 /*MangledName=*/GetMDString(1), 4029 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4030 /*Flags=*/GetMDInt(2)), 4031 /*Order=*/GetMDInt(3)); 4032 break; 4033 } 4034 } 4035 } 4036 4037 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4038 if (!KmpRoutineEntryPtrTy) { 4039 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4040 ASTContext &C = CGM.getContext(); 4041 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4042 FunctionProtoType::ExtProtoInfo EPI; 4043 KmpRoutineEntryPtrQTy = C.getPointerType( 4044 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4045 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4046 } 4047 } 4048 4049 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4050 // Make sure the type of the entry is already created. This is the type we 4051 // have to create: 4052 // struct __tgt_offload_entry{ 4053 // void *addr; // Pointer to the offload entry info. 4054 // // (function or global) 4055 // char *name; // Name of the function or global. 4056 // size_t size; // Size of the entry info (0 if it a function). 4057 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4058 // int32_t reserved; // Reserved, to use by the runtime library. 4059 // }; 4060 if (TgtOffloadEntryQTy.isNull()) { 4061 ASTContext &C = CGM.getContext(); 4062 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4063 RD->startDefinition(); 4064 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4065 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4066 addFieldToRecordDecl(C, RD, C.getSizeType()); 4067 addFieldToRecordDecl( 4068 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4069 addFieldToRecordDecl( 4070 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4071 RD->completeDefinition(); 4072 RD->addAttr(PackedAttr::CreateImplicit(C)); 4073 TgtOffloadEntryQTy = C.getRecordType(RD); 4074 } 4075 return TgtOffloadEntryQTy; 4076 } 4077 4078 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4079 // These are the types we need to build: 4080 // struct __tgt_device_image{ 4081 // void *ImageStart; // Pointer to the target code start. 4082 // void *ImageEnd; // Pointer to the target code end. 4083 // // We also add the host entries to the device image, as it may be useful 4084 // // for the target runtime to have access to that information. 4085 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4086 // // the entries. 4087 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4088 // // entries (non inclusive). 4089 // }; 4090 if (TgtDeviceImageQTy.isNull()) { 4091 ASTContext &C = CGM.getContext(); 4092 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4093 RD->startDefinition(); 4094 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4095 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4096 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4097 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4098 RD->completeDefinition(); 4099 TgtDeviceImageQTy = C.getRecordType(RD); 4100 } 4101 return TgtDeviceImageQTy; 4102 } 4103 4104 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4105 // struct __tgt_bin_desc{ 4106 // int32_t NumDevices; // Number of devices supported. 4107 // __tgt_device_image *DeviceImages; // Arrays of device images 4108 // // (one per device). 4109 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4110 // // entries. 4111 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4112 // // entries (non inclusive). 4113 // }; 4114 if (TgtBinaryDescriptorQTy.isNull()) { 4115 ASTContext &C = CGM.getContext(); 4116 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4117 RD->startDefinition(); 4118 addFieldToRecordDecl( 4119 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4120 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4121 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4122 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4123 RD->completeDefinition(); 4124 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4125 } 4126 return TgtBinaryDescriptorQTy; 4127 } 4128 4129 namespace { 4130 struct PrivateHelpersTy { 4131 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4132 const VarDecl *PrivateElemInit) 4133 : Original(Original), PrivateCopy(PrivateCopy), 4134 PrivateElemInit(PrivateElemInit) {} 4135 const VarDecl *Original; 4136 const VarDecl *PrivateCopy; 4137 const VarDecl *PrivateElemInit; 4138 }; 4139 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4140 } // anonymous namespace 4141 4142 static RecordDecl * 4143 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4144 if (!Privates.empty()) { 4145 ASTContext &C = CGM.getContext(); 4146 // Build struct .kmp_privates_t. { 4147 // /* private vars */ 4148 // }; 4149 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4150 RD->startDefinition(); 4151 for (const auto &Pair : Privates) { 4152 const VarDecl *VD = Pair.second.Original; 4153 QualType Type = VD->getType().getNonReferenceType(); 4154 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4155 if (VD->hasAttrs()) { 4156 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4157 E(VD->getAttrs().end()); 4158 I != E; ++I) 4159 FD->addAttr(*I); 4160 } 4161 } 4162 RD->completeDefinition(); 4163 return RD; 4164 } 4165 return nullptr; 4166 } 4167 4168 static RecordDecl * 4169 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4170 QualType KmpInt32Ty, 4171 QualType KmpRoutineEntryPointerQTy) { 4172 ASTContext &C = CGM.getContext(); 4173 // Build struct kmp_task_t { 4174 // void * shareds; 4175 // kmp_routine_entry_t routine; 4176 // kmp_int32 part_id; 4177 // kmp_cmplrdata_t data1; 4178 // kmp_cmplrdata_t data2; 4179 // For taskloops additional fields: 4180 // kmp_uint64 lb; 4181 // kmp_uint64 ub; 4182 // kmp_int64 st; 4183 // kmp_int32 liter; 4184 // void * reductions; 4185 // }; 4186 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4187 UD->startDefinition(); 4188 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4189 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4190 UD->completeDefinition(); 4191 QualType KmpCmplrdataTy = C.getRecordType(UD); 4192 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4193 RD->startDefinition(); 4194 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4195 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4196 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4197 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4198 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4199 if (isOpenMPTaskLoopDirective(Kind)) { 4200 QualType KmpUInt64Ty = 4201 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4202 QualType KmpInt64Ty = 4203 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4204 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4205 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4206 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4207 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4208 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4209 } 4210 RD->completeDefinition(); 4211 return RD; 4212 } 4213 4214 static RecordDecl * 4215 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4216 ArrayRef<PrivateDataTy> Privates) { 4217 ASTContext &C = CGM.getContext(); 4218 // Build struct kmp_task_t_with_privates { 4219 // kmp_task_t task_data; 4220 // .kmp_privates_t. privates; 4221 // }; 4222 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4223 RD->startDefinition(); 4224 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4225 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4226 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4227 RD->completeDefinition(); 4228 return RD; 4229 } 4230 4231 /// \brief Emit a proxy function which accepts kmp_task_t as the second 4232 /// argument. 4233 /// \code 4234 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4235 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4236 /// For taskloops: 4237 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4238 /// tt->reductions, tt->shareds); 4239 /// return 0; 4240 /// } 4241 /// \endcode 4242 static llvm::Value * 4243 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4244 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4245 QualType KmpTaskTWithPrivatesPtrQTy, 4246 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4247 QualType SharedsPtrTy, llvm::Value *TaskFunction, 4248 llvm::Value *TaskPrivatesMap) { 4249 ASTContext &C = CGM.getContext(); 4250 FunctionArgList Args; 4251 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4252 ImplicitParamDecl::Other); 4253 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4254 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4255 ImplicitParamDecl::Other); 4256 Args.push_back(&GtidArg); 4257 Args.push_back(&TaskTypeArg); 4258 const auto &TaskEntryFnInfo = 4259 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4260 llvm::FunctionType *TaskEntryTy = 4261 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4262 auto *TaskEntry = 4263 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 4264 ".omp_task_entry.", &CGM.getModule()); 4265 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4266 TaskEntry->setDoesNotRecurse(); 4267 CodeGenFunction CGF(CGM); 4268 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4269 Loc, Loc); 4270 4271 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4272 // tt, 4273 // For taskloops: 4274 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4275 // tt->task_data.shareds); 4276 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4277 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4278 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4279 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4280 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4281 const auto *KmpTaskTWithPrivatesQTyRD = 4282 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4283 LValue Base = 4284 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4285 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4286 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4287 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4288 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4289 4290 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4291 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4292 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4293 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4294 CGF.ConvertTypeForMem(SharedsPtrTy)); 4295 4296 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4297 llvm::Value *PrivatesParam; 4298 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4299 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4300 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4301 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4302 } else { 4303 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4304 } 4305 4306 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4307 TaskPrivatesMap, 4308 CGF.Builder 4309 .CreatePointerBitCastOrAddrSpaceCast( 4310 TDBase.getAddress(), CGF.VoidPtrTy) 4311 .getPointer()}; 4312 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4313 std::end(CommonArgs)); 4314 if (isOpenMPTaskLoopDirective(Kind)) { 4315 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4316 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4317 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4318 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4319 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4320 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4321 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4322 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4323 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4324 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4325 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4326 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4327 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4328 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4329 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4330 CallArgs.push_back(LBParam); 4331 CallArgs.push_back(UBParam); 4332 CallArgs.push_back(StParam); 4333 CallArgs.push_back(LIParam); 4334 CallArgs.push_back(RParam); 4335 } 4336 CallArgs.push_back(SharedsParam); 4337 4338 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4339 CallArgs); 4340 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4341 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4342 CGF.FinishFunction(); 4343 return TaskEntry; 4344 } 4345 4346 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4347 SourceLocation Loc, 4348 QualType KmpInt32Ty, 4349 QualType KmpTaskTWithPrivatesPtrQTy, 4350 QualType KmpTaskTWithPrivatesQTy) { 4351 ASTContext &C = CGM.getContext(); 4352 FunctionArgList Args; 4353 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4354 ImplicitParamDecl::Other); 4355 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4356 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4357 ImplicitParamDecl::Other); 4358 Args.push_back(&GtidArg); 4359 Args.push_back(&TaskTypeArg); 4360 const auto &DestructorFnInfo = 4361 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4362 llvm::FunctionType *DestructorFnTy = 4363 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4364 auto *DestructorFn = 4365 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4366 ".omp_task_destructor.", &CGM.getModule()); 4367 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4368 DestructorFnInfo); 4369 DestructorFn->setDoesNotRecurse(); 4370 CodeGenFunction CGF(CGM); 4371 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4372 Args, Loc, Loc); 4373 4374 LValue Base = CGF.EmitLoadOfPointerLValue( 4375 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4376 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4377 const auto *KmpTaskTWithPrivatesQTyRD = 4378 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4379 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4380 Base = CGF.EmitLValueForField(Base, *FI); 4381 for (const auto *Field : 4382 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4383 if (QualType::DestructionKind DtorKind = 4384 Field->getType().isDestructedType()) { 4385 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4386 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4387 } 4388 } 4389 CGF.FinishFunction(); 4390 return DestructorFn; 4391 } 4392 4393 /// \brief Emit a privates mapping function for correct handling of private and 4394 /// firstprivate variables. 4395 /// \code 4396 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4397 /// **noalias priv1,..., <tyn> **noalias privn) { 4398 /// *priv1 = &.privates.priv1; 4399 /// ...; 4400 /// *privn = &.privates.privn; 4401 /// } 4402 /// \endcode 4403 static llvm::Value * 4404 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4405 ArrayRef<const Expr *> PrivateVars, 4406 ArrayRef<const Expr *> FirstprivateVars, 4407 ArrayRef<const Expr *> LastprivateVars, 4408 QualType PrivatesQTy, 4409 ArrayRef<PrivateDataTy> Privates) { 4410 ASTContext &C = CGM.getContext(); 4411 FunctionArgList Args; 4412 ImplicitParamDecl TaskPrivatesArg( 4413 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4414 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4415 ImplicitParamDecl::Other); 4416 Args.push_back(&TaskPrivatesArg); 4417 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4418 unsigned Counter = 1; 4419 for (const Expr *E : PrivateVars) { 4420 Args.push_back(ImplicitParamDecl::Create( 4421 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4422 C.getPointerType(C.getPointerType(E->getType())) 4423 .withConst() 4424 .withRestrict(), 4425 ImplicitParamDecl::Other)); 4426 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4427 PrivateVarsPos[VD] = Counter; 4428 ++Counter; 4429 } 4430 for (const Expr *E : FirstprivateVars) { 4431 Args.push_back(ImplicitParamDecl::Create( 4432 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4433 C.getPointerType(C.getPointerType(E->getType())) 4434 .withConst() 4435 .withRestrict(), 4436 ImplicitParamDecl::Other)); 4437 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4438 PrivateVarsPos[VD] = Counter; 4439 ++Counter; 4440 } 4441 for (const Expr *E : LastprivateVars) { 4442 Args.push_back(ImplicitParamDecl::Create( 4443 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4444 C.getPointerType(C.getPointerType(E->getType())) 4445 .withConst() 4446 .withRestrict(), 4447 ImplicitParamDecl::Other)); 4448 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4449 PrivateVarsPos[VD] = Counter; 4450 ++Counter; 4451 } 4452 const auto &TaskPrivatesMapFnInfo = 4453 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4454 llvm::FunctionType *TaskPrivatesMapTy = 4455 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4456 auto *TaskPrivatesMap = llvm::Function::Create( 4457 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 4458 ".omp_task_privates_map.", &CGM.getModule()); 4459 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4460 TaskPrivatesMapFnInfo); 4461 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4462 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4463 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4464 CodeGenFunction CGF(CGM); 4465 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4466 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4467 4468 // *privi = &.privates.privi; 4469 LValue Base = CGF.EmitLoadOfPointerLValue( 4470 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4471 TaskPrivatesArg.getType()->castAs<PointerType>()); 4472 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4473 Counter = 0; 4474 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4475 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4476 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4477 LValue RefLVal = 4478 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4479 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4480 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4481 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4482 ++Counter; 4483 } 4484 CGF.FinishFunction(); 4485 return TaskPrivatesMap; 4486 } 4487 4488 static bool stable_sort_comparator(const PrivateDataTy P1, 4489 const PrivateDataTy P2) { 4490 return P1.first > P2.first; 4491 } 4492 4493 /// Emit initialization for private variables in task-based directives. 4494 static void emitPrivatesInit(CodeGenFunction &CGF, 4495 const OMPExecutableDirective &D, 4496 Address KmpTaskSharedsPtr, LValue TDBase, 4497 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4498 QualType SharedsTy, QualType SharedsPtrTy, 4499 const OMPTaskDataTy &Data, 4500 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4501 ASTContext &C = CGF.getContext(); 4502 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4503 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4504 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4505 ? OMPD_taskloop 4506 : OMPD_task; 4507 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4508 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4509 LValue SrcBase; 4510 bool IsTargetTask = 4511 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4512 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4513 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4514 // PointersArray and SizesArray. The original variables for these arrays are 4515 // not captured and we get their addresses explicitly. 4516 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4517 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4518 SrcBase = CGF.MakeAddrLValue( 4519 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4520 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4521 SharedsTy); 4522 } 4523 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4524 for (const PrivateDataTy &Pair : Privates) { 4525 const VarDecl *VD = Pair.second.PrivateCopy; 4526 const Expr *Init = VD->getAnyInitializer(); 4527 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4528 !CGF.isTrivialInitializer(Init)))) { 4529 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4530 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4531 const VarDecl *OriginalVD = Pair.second.Original; 4532 // Check if the variable is the target-based BasePointersArray, 4533 // PointersArray or SizesArray. 4534 LValue SharedRefLValue; 4535 QualType Type = OriginalVD->getType(); 4536 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4537 if (IsTargetTask && !SharedField) { 4538 assert(isa<ImplicitParamDecl>(OriginalVD) && 4539 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4540 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4541 ->getNumParams() == 0 && 4542 isa<TranslationUnitDecl>( 4543 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4544 ->getDeclContext()) && 4545 "Expected artificial target data variable."); 4546 SharedRefLValue = 4547 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4548 } else { 4549 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4550 SharedRefLValue = CGF.MakeAddrLValue( 4551 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4552 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4553 SharedRefLValue.getTBAAInfo()); 4554 } 4555 if (Type->isArrayType()) { 4556 // Initialize firstprivate array. 4557 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4558 // Perform simple memcpy. 4559 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4560 } else { 4561 // Initialize firstprivate array using element-by-element 4562 // initialization. 4563 CGF.EmitOMPAggregateAssign( 4564 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4565 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4566 Address SrcElement) { 4567 // Clean up any temporaries needed by the initialization. 4568 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4569 InitScope.addPrivate( 4570 Elem, [SrcElement]() -> Address { return SrcElement; }); 4571 (void)InitScope.Privatize(); 4572 // Emit initialization for single element. 4573 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4574 CGF, &CapturesInfo); 4575 CGF.EmitAnyExprToMem(Init, DestElement, 4576 Init->getType().getQualifiers(), 4577 /*IsInitializer=*/false); 4578 }); 4579 } 4580 } else { 4581 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4582 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4583 return SharedRefLValue.getAddress(); 4584 }); 4585 (void)InitScope.Privatize(); 4586 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4587 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4588 /*capturedByInit=*/false); 4589 } 4590 } else { 4591 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4592 } 4593 } 4594 ++FI; 4595 } 4596 } 4597 4598 /// Check if duplication function is required for taskloops. 4599 static bool checkInitIsRequired(CodeGenFunction &CGF, 4600 ArrayRef<PrivateDataTy> Privates) { 4601 bool InitRequired = false; 4602 for (const PrivateDataTy &Pair : Privates) { 4603 const VarDecl *VD = Pair.second.PrivateCopy; 4604 const Expr *Init = VD->getAnyInitializer(); 4605 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4606 !CGF.isTrivialInitializer(Init)); 4607 if (InitRequired) 4608 break; 4609 } 4610 return InitRequired; 4611 } 4612 4613 4614 /// Emit task_dup function (for initialization of 4615 /// private/firstprivate/lastprivate vars and last_iter flag) 4616 /// \code 4617 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4618 /// lastpriv) { 4619 /// // setup lastprivate flag 4620 /// task_dst->last = lastpriv; 4621 /// // could be constructor calls here... 4622 /// } 4623 /// \endcode 4624 static llvm::Value * 4625 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4626 const OMPExecutableDirective &D, 4627 QualType KmpTaskTWithPrivatesPtrQTy, 4628 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4629 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4630 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4631 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4632 ASTContext &C = CGM.getContext(); 4633 FunctionArgList Args; 4634 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4635 KmpTaskTWithPrivatesPtrQTy, 4636 ImplicitParamDecl::Other); 4637 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4638 KmpTaskTWithPrivatesPtrQTy, 4639 ImplicitParamDecl::Other); 4640 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4641 ImplicitParamDecl::Other); 4642 Args.push_back(&DstArg); 4643 Args.push_back(&SrcArg); 4644 Args.push_back(&LastprivArg); 4645 const auto &TaskDupFnInfo = 4646 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4647 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4648 auto *TaskDup = 4649 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 4650 ".omp_task_dup.", &CGM.getModule()); 4651 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4652 TaskDup->setDoesNotRecurse(); 4653 CodeGenFunction CGF(CGM); 4654 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4655 Loc); 4656 4657 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4658 CGF.GetAddrOfLocalVar(&DstArg), 4659 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4660 // task_dst->liter = lastpriv; 4661 if (WithLastIter) { 4662 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4663 LValue Base = CGF.EmitLValueForField( 4664 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4665 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4666 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4667 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4668 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4669 } 4670 4671 // Emit initial values for private copies (if any). 4672 assert(!Privates.empty()); 4673 Address KmpTaskSharedsPtr = Address::invalid(); 4674 if (!Data.FirstprivateVars.empty()) { 4675 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4676 CGF.GetAddrOfLocalVar(&SrcArg), 4677 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4678 LValue Base = CGF.EmitLValueForField( 4679 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4680 KmpTaskSharedsPtr = Address( 4681 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4682 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4683 KmpTaskTShareds)), 4684 Loc), 4685 CGF.getNaturalTypeAlignment(SharedsTy)); 4686 } 4687 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4688 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4689 CGF.FinishFunction(); 4690 return TaskDup; 4691 } 4692 4693 /// Checks if destructor function is required to be generated. 4694 /// \return true if cleanups are required, false otherwise. 4695 static bool 4696 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4697 bool NeedsCleanup = false; 4698 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4699 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4700 for (const FieldDecl *FD : PrivateRD->fields()) { 4701 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4702 if (NeedsCleanup) 4703 break; 4704 } 4705 return NeedsCleanup; 4706 } 4707 4708 CGOpenMPRuntime::TaskResultTy 4709 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4710 const OMPExecutableDirective &D, 4711 llvm::Value *TaskFunction, QualType SharedsTy, 4712 Address Shareds, const OMPTaskDataTy &Data) { 4713 ASTContext &C = CGM.getContext(); 4714 llvm::SmallVector<PrivateDataTy, 4> Privates; 4715 // Aggregate privates and sort them by the alignment. 4716 auto I = Data.PrivateCopies.begin(); 4717 for (const Expr *E : Data.PrivateVars) { 4718 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4719 Privates.emplace_back( 4720 C.getDeclAlign(VD), 4721 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4722 /*PrivateElemInit=*/nullptr)); 4723 ++I; 4724 } 4725 I = Data.FirstprivateCopies.begin(); 4726 auto IElemInitRef = Data.FirstprivateInits.begin(); 4727 for (const Expr *E : Data.FirstprivateVars) { 4728 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4729 Privates.emplace_back( 4730 C.getDeclAlign(VD), 4731 PrivateHelpersTy( 4732 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4733 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4734 ++I; 4735 ++IElemInitRef; 4736 } 4737 I = Data.LastprivateCopies.begin(); 4738 for (const Expr *E : Data.LastprivateVars) { 4739 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4740 Privates.emplace_back( 4741 C.getDeclAlign(VD), 4742 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4743 /*PrivateElemInit=*/nullptr)); 4744 ++I; 4745 } 4746 std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); 4747 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4748 // Build type kmp_routine_entry_t (if not built yet). 4749 emitKmpRoutineEntryT(KmpInt32Ty); 4750 // Build type kmp_task_t (if not built yet). 4751 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4752 if (SavedKmpTaskloopTQTy.isNull()) { 4753 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4754 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4755 } 4756 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4757 } else { 4758 assert((D.getDirectiveKind() == OMPD_task || 4759 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4760 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4761 "Expected taskloop, task or target directive"); 4762 if (SavedKmpTaskTQTy.isNull()) { 4763 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4764 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4765 } 4766 KmpTaskTQTy = SavedKmpTaskTQTy; 4767 } 4768 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4769 // Build particular struct kmp_task_t for the given task. 4770 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4771 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4772 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4773 QualType KmpTaskTWithPrivatesPtrQTy = 4774 C.getPointerType(KmpTaskTWithPrivatesQTy); 4775 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4776 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4777 KmpTaskTWithPrivatesTy->getPointerTo(); 4778 llvm::Value *KmpTaskTWithPrivatesTySize = 4779 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4780 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4781 4782 // Emit initial values for private copies (if any). 4783 llvm::Value *TaskPrivatesMap = nullptr; 4784 llvm::Type *TaskPrivatesMapTy = 4785 std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); 4786 if (!Privates.empty()) { 4787 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4788 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4789 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4790 FI->getType(), Privates); 4791 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4792 TaskPrivatesMap, TaskPrivatesMapTy); 4793 } else { 4794 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4795 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4796 } 4797 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4798 // kmp_task_t *tt); 4799 llvm::Value *TaskEntry = emitProxyTaskFunction( 4800 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4801 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4802 TaskPrivatesMap); 4803 4804 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4805 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4806 // kmp_routine_entry_t *task_entry); 4807 // Task flags. Format is taken from 4808 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 4809 // description of kmp_tasking_flags struct. 4810 enum { 4811 TiedFlag = 0x1, 4812 FinalFlag = 0x2, 4813 DestructorsFlag = 0x8, 4814 PriorityFlag = 0x20 4815 }; 4816 unsigned Flags = Data.Tied ? TiedFlag : 0; 4817 bool NeedsCleanup = false; 4818 if (!Privates.empty()) { 4819 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4820 if (NeedsCleanup) 4821 Flags = Flags | DestructorsFlag; 4822 } 4823 if (Data.Priority.getInt()) 4824 Flags = Flags | PriorityFlag; 4825 llvm::Value *TaskFlags = 4826 Data.Final.getPointer() 4827 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4828 CGF.Builder.getInt32(FinalFlag), 4829 CGF.Builder.getInt32(/*C=*/0)) 4830 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4831 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4832 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4833 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 4834 getThreadID(CGF, Loc), TaskFlags, 4835 KmpTaskTWithPrivatesTySize, SharedsSize, 4836 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4837 TaskEntry, KmpRoutineEntryPtrTy)}; 4838 llvm::Value *NewTask = CGF.EmitRuntimeCall( 4839 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 4840 llvm::Value *NewTaskNewTaskTTy = 4841 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4842 NewTask, KmpTaskTWithPrivatesPtrTy); 4843 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4844 KmpTaskTWithPrivatesQTy); 4845 LValue TDBase = 4846 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4847 // Fill the data in the resulting kmp_task_t record. 4848 // Copy shareds if there are any. 4849 Address KmpTaskSharedsPtr = Address::invalid(); 4850 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4851 KmpTaskSharedsPtr = 4852 Address(CGF.EmitLoadOfScalar( 4853 CGF.EmitLValueForField( 4854 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4855 KmpTaskTShareds)), 4856 Loc), 4857 CGF.getNaturalTypeAlignment(SharedsTy)); 4858 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4859 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4860 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4861 } 4862 // Emit initial values for private copies (if any). 4863 TaskResultTy Result; 4864 if (!Privates.empty()) { 4865 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4866 SharedsTy, SharedsPtrTy, Data, Privates, 4867 /*ForDup=*/false); 4868 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4869 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4870 Result.TaskDupFn = emitTaskDupFunction( 4871 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4872 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4873 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4874 } 4875 } 4876 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4877 enum { Priority = 0, Destructors = 1 }; 4878 // Provide pointer to function with destructors for privates. 4879 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4880 const RecordDecl *KmpCmplrdataUD = 4881 (*FI)->getType()->getAsUnionType()->getDecl(); 4882 if (NeedsCleanup) { 4883 llvm::Value *DestructorFn = emitDestructorsFunction( 4884 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4885 KmpTaskTWithPrivatesQTy); 4886 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4887 LValue DestructorsLV = CGF.EmitLValueForField( 4888 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4889 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4890 DestructorFn, KmpRoutineEntryPtrTy), 4891 DestructorsLV); 4892 } 4893 // Set priority. 4894 if (Data.Priority.getInt()) { 4895 LValue Data2LV = CGF.EmitLValueForField( 4896 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4897 LValue PriorityLV = CGF.EmitLValueForField( 4898 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4899 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4900 } 4901 Result.NewTask = NewTask; 4902 Result.TaskEntry = TaskEntry; 4903 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4904 Result.TDBase = TDBase; 4905 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4906 return Result; 4907 } 4908 4909 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4910 const OMPExecutableDirective &D, 4911 llvm::Value *TaskFunction, 4912 QualType SharedsTy, Address Shareds, 4913 const Expr *IfCond, 4914 const OMPTaskDataTy &Data) { 4915 if (!CGF.HaveInsertPoint()) 4916 return; 4917 4918 TaskResultTy Result = 4919 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4920 llvm::Value *NewTask = Result.NewTask; 4921 llvm::Value *TaskEntry = Result.TaskEntry; 4922 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4923 LValue TDBase = Result.TDBase; 4924 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4925 ASTContext &C = CGM.getContext(); 4926 // Process list of dependences. 4927 Address DependenciesArray = Address::invalid(); 4928 unsigned NumDependencies = Data.Dependences.size(); 4929 if (NumDependencies) { 4930 // Dependence kind for RTL. 4931 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 4932 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4933 RecordDecl *KmpDependInfoRD; 4934 QualType FlagsTy = 4935 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4936 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4937 if (KmpDependInfoTy.isNull()) { 4938 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4939 KmpDependInfoRD->startDefinition(); 4940 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4941 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4942 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4943 KmpDependInfoRD->completeDefinition(); 4944 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4945 } else { 4946 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4947 } 4948 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 4949 // Define type kmp_depend_info[<Dependences.size()>]; 4950 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4951 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 4952 ArrayType::Normal, /*IndexTypeQuals=*/0); 4953 // kmp_depend_info[<Dependences.size()>] deps; 4954 DependenciesArray = 4955 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4956 for (unsigned I = 0; I < NumDependencies; ++I) { 4957 const Expr *E = Data.Dependences[I].second; 4958 LValue Addr = CGF.EmitLValue(E); 4959 llvm::Value *Size; 4960 QualType Ty = E->getType(); 4961 if (const auto *ASE = 4962 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4963 LValue UpAddrLVal = 4964 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 4965 llvm::Value *UpAddr = 4966 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 4967 llvm::Value *LowIntPtr = 4968 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 4969 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 4970 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4971 } else { 4972 Size = CGF.getTypeSize(Ty); 4973 } 4974 LValue Base = CGF.MakeAddrLValue( 4975 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize), 4976 KmpDependInfoTy); 4977 // deps[i].base_addr = &<Dependences[i].second>; 4978 LValue BaseAddrLVal = CGF.EmitLValueForField( 4979 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4980 CGF.EmitStoreOfScalar( 4981 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 4982 BaseAddrLVal); 4983 // deps[i].len = sizeof(<Dependences[i].second>); 4984 LValue LenLVal = CGF.EmitLValueForField( 4985 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4986 CGF.EmitStoreOfScalar(Size, LenLVal); 4987 // deps[i].flags = <Dependences[i].first>; 4988 RTLDependenceKindTy DepKind; 4989 switch (Data.Dependences[I].first) { 4990 case OMPC_DEPEND_in: 4991 DepKind = DepIn; 4992 break; 4993 // Out and InOut dependencies must use the same code. 4994 case OMPC_DEPEND_out: 4995 case OMPC_DEPEND_inout: 4996 DepKind = DepInOut; 4997 break; 4998 case OMPC_DEPEND_source: 4999 case OMPC_DEPEND_sink: 5000 case OMPC_DEPEND_unknown: 5001 llvm_unreachable("Unknown task dependence type"); 5002 } 5003 LValue FlagsLVal = CGF.EmitLValueForField( 5004 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5005 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5006 FlagsLVal); 5007 } 5008 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5009 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 5010 CGF.VoidPtrTy); 5011 } 5012 5013 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5014 // libcall. 5015 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5016 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5017 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5018 // list is not empty 5019 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5020 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5021 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5022 llvm::Value *DepTaskArgs[7]; 5023 if (NumDependencies) { 5024 DepTaskArgs[0] = UpLoc; 5025 DepTaskArgs[1] = ThreadID; 5026 DepTaskArgs[2] = NewTask; 5027 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5028 DepTaskArgs[4] = DependenciesArray.getPointer(); 5029 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5030 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5031 } 5032 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5033 &TaskArgs, 5034 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5035 if (!Data.Tied) { 5036 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5037 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5038 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5039 } 5040 if (NumDependencies) { 5041 CGF.EmitRuntimeCall( 5042 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5043 } else { 5044 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5045 TaskArgs); 5046 } 5047 // Check if parent region is untied and build return for untied task; 5048 if (auto *Region = 5049 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5050 Region->emitUntiedSwitch(CGF); 5051 }; 5052 5053 llvm::Value *DepWaitTaskArgs[6]; 5054 if (NumDependencies) { 5055 DepWaitTaskArgs[0] = UpLoc; 5056 DepWaitTaskArgs[1] = ThreadID; 5057 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5058 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5059 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5060 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5061 } 5062 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5063 NumDependencies, &DepWaitTaskArgs, 5064 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5065 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5066 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5067 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5068 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5069 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5070 // is specified. 5071 if (NumDependencies) 5072 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5073 DepWaitTaskArgs); 5074 // Call proxy_task_entry(gtid, new_task); 5075 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5076 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5077 Action.Enter(CGF); 5078 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5079 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5080 OutlinedFnArgs); 5081 }; 5082 5083 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5084 // kmp_task_t *new_task); 5085 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5086 // kmp_task_t *new_task); 5087 RegionCodeGenTy RCG(CodeGen); 5088 CommonActionTy Action( 5089 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5090 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5091 RCG.setAction(Action); 5092 RCG(CGF); 5093 }; 5094 5095 if (IfCond) { 5096 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5097 } else { 5098 RegionCodeGenTy ThenRCG(ThenCodeGen); 5099 ThenRCG(CGF); 5100 } 5101 } 5102 5103 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5104 const OMPLoopDirective &D, 5105 llvm::Value *TaskFunction, 5106 QualType SharedsTy, Address Shareds, 5107 const Expr *IfCond, 5108 const OMPTaskDataTy &Data) { 5109 if (!CGF.HaveInsertPoint()) 5110 return; 5111 TaskResultTy Result = 5112 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5113 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5114 // libcall. 5115 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5116 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5117 // sched, kmp_uint64 grainsize, void *task_dup); 5118 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5119 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5120 llvm::Value *IfVal; 5121 if (IfCond) { 5122 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5123 /*isSigned=*/true); 5124 } else { 5125 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5126 } 5127 5128 LValue LBLVal = CGF.EmitLValueForField( 5129 Result.TDBase, 5130 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5131 const auto *LBVar = 5132 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5133 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5134 /*IsInitializer=*/true); 5135 LValue UBLVal = CGF.EmitLValueForField( 5136 Result.TDBase, 5137 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5138 const auto *UBVar = 5139 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5140 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5141 /*IsInitializer=*/true); 5142 LValue StLVal = CGF.EmitLValueForField( 5143 Result.TDBase, 5144 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5145 const auto *StVar = 5146 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5147 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5148 /*IsInitializer=*/true); 5149 // Store reductions address. 5150 LValue RedLVal = CGF.EmitLValueForField( 5151 Result.TDBase, 5152 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5153 if (Data.Reductions) { 5154 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5155 } else { 5156 CGF.EmitNullInitialization(RedLVal.getAddress(), 5157 CGF.getContext().VoidPtrTy); 5158 } 5159 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5160 llvm::Value *TaskArgs[] = { 5161 UpLoc, 5162 ThreadID, 5163 Result.NewTask, 5164 IfVal, 5165 LBLVal.getPointer(), 5166 UBLVal.getPointer(), 5167 CGF.EmitLoadOfScalar(StLVal, Loc), 5168 llvm::ConstantInt::getNullValue( 5169 CGF.IntTy), // Always 0 because taskgroup emitted by the compiler 5170 llvm::ConstantInt::getSigned( 5171 CGF.IntTy, Data.Schedule.getPointer() 5172 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5173 : NoSchedule), 5174 Data.Schedule.getPointer() 5175 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5176 /*isSigned=*/false) 5177 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5178 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5179 Result.TaskDupFn, CGF.VoidPtrTy) 5180 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5181 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5182 } 5183 5184 /// \brief Emit reduction operation for each element of array (required for 5185 /// array sections) LHS op = RHS. 5186 /// \param Type Type of array. 5187 /// \param LHSVar Variable on the left side of the reduction operation 5188 /// (references element of array in original variable). 5189 /// \param RHSVar Variable on the right side of the reduction operation 5190 /// (references element of array in original variable). 5191 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5192 /// RHSVar. 5193 static void EmitOMPAggregateReduction( 5194 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5195 const VarDecl *RHSVar, 5196 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5197 const Expr *, const Expr *)> &RedOpGen, 5198 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5199 const Expr *UpExpr = nullptr) { 5200 // Perform element-by-element initialization. 5201 QualType ElementTy; 5202 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5203 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5204 5205 // Drill down to the base element type on both arrays. 5206 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5207 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5208 5209 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5210 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5211 // Cast from pointer to array type to pointer to single element. 5212 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5213 // The basic structure here is a while-do loop. 5214 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5215 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5216 llvm::Value *IsEmpty = 5217 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5218 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5219 5220 // Enter the loop body, making that address the current address. 5221 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5222 CGF.EmitBlock(BodyBB); 5223 5224 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5225 5226 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5227 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5228 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5229 Address RHSElementCurrent = 5230 Address(RHSElementPHI, 5231 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5232 5233 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5234 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5235 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5236 Address LHSElementCurrent = 5237 Address(LHSElementPHI, 5238 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5239 5240 // Emit copy. 5241 CodeGenFunction::OMPPrivateScope Scope(CGF); 5242 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5243 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5244 Scope.Privatize(); 5245 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5246 Scope.ForceCleanup(); 5247 5248 // Shift the address forward by one element. 5249 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5250 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5251 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5252 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5253 // Check whether we've reached the end. 5254 llvm::Value *Done = 5255 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5256 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5257 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5258 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5259 5260 // Done. 5261 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5262 } 5263 5264 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5265 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5266 /// UDR combiner function. 5267 static void emitReductionCombiner(CodeGenFunction &CGF, 5268 const Expr *ReductionOp) { 5269 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5270 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5271 if (const auto *DRE = 5272 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5273 if (const auto *DRD = 5274 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5275 std::pair<llvm::Function *, llvm::Function *> Reduction = 5276 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5277 RValue Func = RValue::get(Reduction.first); 5278 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5279 CGF.EmitIgnoredExpr(ReductionOp); 5280 return; 5281 } 5282 CGF.EmitIgnoredExpr(ReductionOp); 5283 } 5284 5285 llvm::Value *CGOpenMPRuntime::emitReductionFunction( 5286 CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, 5287 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5288 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5289 ASTContext &C = CGM.getContext(); 5290 5291 // void reduction_func(void *LHSArg, void *RHSArg); 5292 FunctionArgList Args; 5293 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5294 ImplicitParamDecl::Other); 5295 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5296 ImplicitParamDecl::Other); 5297 Args.push_back(&LHSArg); 5298 Args.push_back(&RHSArg); 5299 const auto &CGFI = 5300 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5301 auto *Fn = llvm::Function::Create( 5302 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 5303 ".omp.reduction.reduction_func", &CGM.getModule()); 5304 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5305 Fn->setDoesNotRecurse(); 5306 CodeGenFunction CGF(CGM); 5307 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5308 5309 // Dst = (void*[n])(LHSArg); 5310 // Src = (void*[n])(RHSArg); 5311 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5312 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5313 ArgsType), CGF.getPointerAlign()); 5314 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5315 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5316 ArgsType), CGF.getPointerAlign()); 5317 5318 // ... 5319 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5320 // ... 5321 CodeGenFunction::OMPPrivateScope Scope(CGF); 5322 auto IPriv = Privates.begin(); 5323 unsigned Idx = 0; 5324 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5325 const auto *RHSVar = 5326 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5327 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5328 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5329 }); 5330 const auto *LHSVar = 5331 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5332 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5333 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5334 }); 5335 QualType PrivTy = (*IPriv)->getType(); 5336 if (PrivTy->isVariablyModifiedType()) { 5337 // Get array size and emit VLA type. 5338 ++Idx; 5339 Address Elem = 5340 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 5341 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5342 const VariableArrayType *VLA = 5343 CGF.getContext().getAsVariableArrayType(PrivTy); 5344 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5345 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5346 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5347 CGF.EmitVariablyModifiedType(PrivTy); 5348 } 5349 } 5350 Scope.Privatize(); 5351 IPriv = Privates.begin(); 5352 auto ILHS = LHSExprs.begin(); 5353 auto IRHS = RHSExprs.begin(); 5354 for (const Expr *E : ReductionOps) { 5355 if ((*IPriv)->getType()->isArrayType()) { 5356 // Emit reduction for array section. 5357 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5358 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5359 EmitOMPAggregateReduction( 5360 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5361 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5362 emitReductionCombiner(CGF, E); 5363 }); 5364 } else { 5365 // Emit reduction for array subscript or single variable. 5366 emitReductionCombiner(CGF, E); 5367 } 5368 ++IPriv; 5369 ++ILHS; 5370 ++IRHS; 5371 } 5372 Scope.ForceCleanup(); 5373 CGF.FinishFunction(); 5374 return Fn; 5375 } 5376 5377 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5378 const Expr *ReductionOp, 5379 const Expr *PrivateRef, 5380 const DeclRefExpr *LHS, 5381 const DeclRefExpr *RHS) { 5382 if (PrivateRef->getType()->isArrayType()) { 5383 // Emit reduction for array section. 5384 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5385 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5386 EmitOMPAggregateReduction( 5387 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5388 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5389 emitReductionCombiner(CGF, ReductionOp); 5390 }); 5391 } else { 5392 // Emit reduction for array subscript or single variable. 5393 emitReductionCombiner(CGF, ReductionOp); 5394 } 5395 } 5396 5397 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5398 ArrayRef<const Expr *> Privates, 5399 ArrayRef<const Expr *> LHSExprs, 5400 ArrayRef<const Expr *> RHSExprs, 5401 ArrayRef<const Expr *> ReductionOps, 5402 ReductionOptionsTy Options) { 5403 if (!CGF.HaveInsertPoint()) 5404 return; 5405 5406 bool WithNowait = Options.WithNowait; 5407 bool SimpleReduction = Options.SimpleReduction; 5408 5409 // Next code should be emitted for reduction: 5410 // 5411 // static kmp_critical_name lock = { 0 }; 5412 // 5413 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5414 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5415 // ... 5416 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5417 // *(Type<n>-1*)rhs[<n>-1]); 5418 // } 5419 // 5420 // ... 5421 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5422 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5423 // RedList, reduce_func, &<lock>)) { 5424 // case 1: 5425 // ... 5426 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5427 // ... 5428 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5429 // break; 5430 // case 2: 5431 // ... 5432 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5433 // ... 5434 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5435 // break; 5436 // default:; 5437 // } 5438 // 5439 // if SimpleReduction is true, only the next code is generated: 5440 // ... 5441 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5442 // ... 5443 5444 ASTContext &C = CGM.getContext(); 5445 5446 if (SimpleReduction) { 5447 CodeGenFunction::RunCleanupsScope Scope(CGF); 5448 auto IPriv = Privates.begin(); 5449 auto ILHS = LHSExprs.begin(); 5450 auto IRHS = RHSExprs.begin(); 5451 for (const Expr *E : ReductionOps) { 5452 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5453 cast<DeclRefExpr>(*IRHS)); 5454 ++IPriv; 5455 ++ILHS; 5456 ++IRHS; 5457 } 5458 return; 5459 } 5460 5461 // 1. Build a list of reduction variables. 5462 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5463 auto Size = RHSExprs.size(); 5464 for (const Expr *E : Privates) { 5465 if (E->getType()->isVariablyModifiedType()) 5466 // Reserve place for array size. 5467 ++Size; 5468 } 5469 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5470 QualType ReductionArrayTy = 5471 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5472 /*IndexTypeQuals=*/0); 5473 Address ReductionList = 5474 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5475 auto IPriv = Privates.begin(); 5476 unsigned Idx = 0; 5477 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5478 Address Elem = 5479 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 5480 CGF.Builder.CreateStore( 5481 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5482 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5483 Elem); 5484 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5485 // Store array size. 5486 ++Idx; 5487 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 5488 CGF.getPointerSize()); 5489 llvm::Value *Size = CGF.Builder.CreateIntCast( 5490 CGF.getVLASize( 5491 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5492 .NumElts, 5493 CGF.SizeTy, /*isSigned=*/false); 5494 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5495 Elem); 5496 } 5497 } 5498 5499 // 2. Emit reduce_func(). 5500 llvm::Value *ReductionFn = emitReductionFunction( 5501 CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), 5502 Privates, LHSExprs, RHSExprs, ReductionOps); 5503 5504 // 3. Create static kmp_critical_name lock = { 0 }; 5505 llvm::Value *Lock = getCriticalRegionLock(".reduction"); 5506 5507 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5508 // RedList, reduce_func, &<lock>); 5509 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5510 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5511 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5512 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5513 ReductionList.getPointer(), CGF.VoidPtrTy); 5514 llvm::Value *Args[] = { 5515 IdentTLoc, // ident_t *<loc> 5516 ThreadId, // i32 <gtid> 5517 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5518 ReductionArrayTySize, // size_type sizeof(RedList) 5519 RL, // void *RedList 5520 ReductionFn, // void (*) (void *, void *) <reduce_func> 5521 Lock // kmp_critical_name *&<lock> 5522 }; 5523 llvm::Value *Res = CGF.EmitRuntimeCall( 5524 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5525 : OMPRTL__kmpc_reduce), 5526 Args); 5527 5528 // 5. Build switch(res) 5529 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5530 llvm::SwitchInst *SwInst = 5531 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5532 5533 // 6. Build case 1: 5534 // ... 5535 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5536 // ... 5537 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5538 // break; 5539 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5540 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5541 CGF.EmitBlock(Case1BB); 5542 5543 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5544 llvm::Value *EndArgs[] = { 5545 IdentTLoc, // ident_t *<loc> 5546 ThreadId, // i32 <gtid> 5547 Lock // kmp_critical_name *&<lock> 5548 }; 5549 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5550 CodeGenFunction &CGF, PrePostActionTy &Action) { 5551 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5552 auto IPriv = Privates.begin(); 5553 auto ILHS = LHSExprs.begin(); 5554 auto IRHS = RHSExprs.begin(); 5555 for (const Expr *E : ReductionOps) { 5556 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5557 cast<DeclRefExpr>(*IRHS)); 5558 ++IPriv; 5559 ++ILHS; 5560 ++IRHS; 5561 } 5562 }; 5563 RegionCodeGenTy RCG(CodeGen); 5564 CommonActionTy Action( 5565 nullptr, llvm::None, 5566 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5567 : OMPRTL__kmpc_end_reduce), 5568 EndArgs); 5569 RCG.setAction(Action); 5570 RCG(CGF); 5571 5572 CGF.EmitBranch(DefaultBB); 5573 5574 // 7. Build case 2: 5575 // ... 5576 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5577 // ... 5578 // break; 5579 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5580 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5581 CGF.EmitBlock(Case2BB); 5582 5583 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5584 CodeGenFunction &CGF, PrePostActionTy &Action) { 5585 auto ILHS = LHSExprs.begin(); 5586 auto IRHS = RHSExprs.begin(); 5587 auto IPriv = Privates.begin(); 5588 for (const Expr *E : ReductionOps) { 5589 const Expr *XExpr = nullptr; 5590 const Expr *EExpr = nullptr; 5591 const Expr *UpExpr = nullptr; 5592 BinaryOperatorKind BO = BO_Comma; 5593 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5594 if (BO->getOpcode() == BO_Assign) { 5595 XExpr = BO->getLHS(); 5596 UpExpr = BO->getRHS(); 5597 } 5598 } 5599 // Try to emit update expression as a simple atomic. 5600 const Expr *RHSExpr = UpExpr; 5601 if (RHSExpr) { 5602 // Analyze RHS part of the whole expression. 5603 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5604 RHSExpr->IgnoreParenImpCasts())) { 5605 // If this is a conditional operator, analyze its condition for 5606 // min/max reduction operator. 5607 RHSExpr = ACO->getCond(); 5608 } 5609 if (const auto *BORHS = 5610 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5611 EExpr = BORHS->getRHS(); 5612 BO = BORHS->getOpcode(); 5613 } 5614 } 5615 if (XExpr) { 5616 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5617 auto &&AtomicRedGen = [BO, VD, 5618 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5619 const Expr *EExpr, const Expr *UpExpr) { 5620 LValue X = CGF.EmitLValue(XExpr); 5621 RValue E; 5622 if (EExpr) 5623 E = CGF.EmitAnyExpr(EExpr); 5624 CGF.EmitOMPAtomicSimpleUpdateExpr( 5625 X, E, BO, /*IsXLHSInRHSPart=*/true, 5626 llvm::AtomicOrdering::Monotonic, Loc, 5627 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5628 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5629 PrivateScope.addPrivate( 5630 VD, [&CGF, VD, XRValue, Loc]() { 5631 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5632 CGF.emitOMPSimpleStore( 5633 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5634 VD->getType().getNonReferenceType(), Loc); 5635 return LHSTemp; 5636 }); 5637 (void)PrivateScope.Privatize(); 5638 return CGF.EmitAnyExpr(UpExpr); 5639 }); 5640 }; 5641 if ((*IPriv)->getType()->isArrayType()) { 5642 // Emit atomic reduction for array section. 5643 const auto *RHSVar = 5644 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5645 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5646 AtomicRedGen, XExpr, EExpr, UpExpr); 5647 } else { 5648 // Emit atomic reduction for array subscript or single variable. 5649 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5650 } 5651 } else { 5652 // Emit as a critical region. 5653 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5654 const Expr *, const Expr *) { 5655 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5656 RT.emitCriticalRegion( 5657 CGF, ".atomic_reduction", 5658 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5659 Action.Enter(CGF); 5660 emitReductionCombiner(CGF, E); 5661 }, 5662 Loc); 5663 }; 5664 if ((*IPriv)->getType()->isArrayType()) { 5665 const auto *LHSVar = 5666 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5667 const auto *RHSVar = 5668 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5669 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5670 CritRedGen); 5671 } else { 5672 CritRedGen(CGF, nullptr, nullptr, nullptr); 5673 } 5674 } 5675 ++ILHS; 5676 ++IRHS; 5677 ++IPriv; 5678 } 5679 }; 5680 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5681 if (!WithNowait) { 5682 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5683 llvm::Value *EndArgs[] = { 5684 IdentTLoc, // ident_t *<loc> 5685 ThreadId, // i32 <gtid> 5686 Lock // kmp_critical_name *&<lock> 5687 }; 5688 CommonActionTy Action(nullptr, llvm::None, 5689 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5690 EndArgs); 5691 AtomicRCG.setAction(Action); 5692 AtomicRCG(CGF); 5693 } else { 5694 AtomicRCG(CGF); 5695 } 5696 5697 CGF.EmitBranch(DefaultBB); 5698 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5699 } 5700 5701 /// Generates unique name for artificial threadprivate variables. 5702 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5703 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5704 const Expr *Ref) { 5705 SmallString<256> Buffer; 5706 llvm::raw_svector_ostream Out(Buffer); 5707 const clang::DeclRefExpr *DE; 5708 const VarDecl *D = ::getBaseDecl(Ref, DE); 5709 if (!D) 5710 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5711 D = D->getCanonicalDecl(); 5712 Out << Prefix << "." 5713 << (D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)) 5714 << "_" << D->getCanonicalDecl()->getLocStart().getRawEncoding(); 5715 return Out.str(); 5716 } 5717 5718 /// Emits reduction initializer function: 5719 /// \code 5720 /// void @.red_init(void* %arg) { 5721 /// %0 = bitcast void* %arg to <type>* 5722 /// store <type> <init>, <type>* %0 5723 /// ret void 5724 /// } 5725 /// \endcode 5726 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5727 SourceLocation Loc, 5728 ReductionCodeGen &RCG, unsigned N) { 5729 ASTContext &C = CGM.getContext(); 5730 FunctionArgList Args; 5731 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5732 ImplicitParamDecl::Other); 5733 Args.emplace_back(&Param); 5734 const auto &FnInfo = 5735 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5736 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5737 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5738 ".red_init.", &CGM.getModule()); 5739 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5740 Fn->setDoesNotRecurse(); 5741 CodeGenFunction CGF(CGM); 5742 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5743 Address PrivateAddr = CGF.EmitLoadOfPointer( 5744 CGF.GetAddrOfLocalVar(&Param), 5745 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5746 llvm::Value *Size = nullptr; 5747 // If the size of the reduction item is non-constant, load it from global 5748 // threadprivate variable. 5749 if (RCG.getSizes(N).second) { 5750 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5751 CGF, CGM.getContext().getSizeType(), 5752 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5753 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5754 CGM.getContext().getSizeType(), Loc); 5755 } 5756 RCG.emitAggregateType(CGF, N, Size); 5757 LValue SharedLVal; 5758 // If initializer uses initializer from declare reduction construct, emit a 5759 // pointer to the address of the original reduction item (reuired by reduction 5760 // initializer) 5761 if (RCG.usesReductionInitializer(N)) { 5762 Address SharedAddr = 5763 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5764 CGF, CGM.getContext().VoidPtrTy, 5765 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5766 SharedAddr = CGF.EmitLoadOfPointer( 5767 SharedAddr, 5768 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5769 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5770 } else { 5771 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5772 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5773 CGM.getContext().VoidPtrTy); 5774 } 5775 // Emit the initializer: 5776 // %0 = bitcast void* %arg to <type>* 5777 // store <type> <init>, <type>* %0 5778 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5779 [](CodeGenFunction &) { return false; }); 5780 CGF.FinishFunction(); 5781 return Fn; 5782 } 5783 5784 /// Emits reduction combiner function: 5785 /// \code 5786 /// void @.red_comb(void* %arg0, void* %arg1) { 5787 /// %lhs = bitcast void* %arg0 to <type>* 5788 /// %rhs = bitcast void* %arg1 to <type>* 5789 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5790 /// store <type> %2, <type>* %lhs 5791 /// ret void 5792 /// } 5793 /// \endcode 5794 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5795 SourceLocation Loc, 5796 ReductionCodeGen &RCG, unsigned N, 5797 const Expr *ReductionOp, 5798 const Expr *LHS, const Expr *RHS, 5799 const Expr *PrivateRef) { 5800 ASTContext &C = CGM.getContext(); 5801 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5802 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5803 FunctionArgList Args; 5804 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5805 C.VoidPtrTy, ImplicitParamDecl::Other); 5806 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5807 ImplicitParamDecl::Other); 5808 Args.emplace_back(&ParamInOut); 5809 Args.emplace_back(&ParamIn); 5810 const auto &FnInfo = 5811 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5812 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5813 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5814 ".red_comb.", &CGM.getModule()); 5815 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5816 Fn->setDoesNotRecurse(); 5817 CodeGenFunction CGF(CGM); 5818 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5819 llvm::Value *Size = nullptr; 5820 // If the size of the reduction item is non-constant, load it from global 5821 // threadprivate variable. 5822 if (RCG.getSizes(N).second) { 5823 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5824 CGF, CGM.getContext().getSizeType(), 5825 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5826 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5827 CGM.getContext().getSizeType(), Loc); 5828 } 5829 RCG.emitAggregateType(CGF, N, Size); 5830 // Remap lhs and rhs variables to the addresses of the function arguments. 5831 // %lhs = bitcast void* %arg0 to <type>* 5832 // %rhs = bitcast void* %arg1 to <type>* 5833 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5834 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5835 // Pull out the pointer to the variable. 5836 Address PtrAddr = CGF.EmitLoadOfPointer( 5837 CGF.GetAddrOfLocalVar(&ParamInOut), 5838 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5839 return CGF.Builder.CreateElementBitCast( 5840 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5841 }); 5842 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5843 // Pull out the pointer to the variable. 5844 Address PtrAddr = CGF.EmitLoadOfPointer( 5845 CGF.GetAddrOfLocalVar(&ParamIn), 5846 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5847 return CGF.Builder.CreateElementBitCast( 5848 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5849 }); 5850 PrivateScope.Privatize(); 5851 // Emit the combiner body: 5852 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5853 // store <type> %2, <type>* %lhs 5854 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5855 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5856 cast<DeclRefExpr>(RHS)); 5857 CGF.FinishFunction(); 5858 return Fn; 5859 } 5860 5861 /// Emits reduction finalizer function: 5862 /// \code 5863 /// void @.red_fini(void* %arg) { 5864 /// %0 = bitcast void* %arg to <type>* 5865 /// <destroy>(<type>* %0) 5866 /// ret void 5867 /// } 5868 /// \endcode 5869 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5870 SourceLocation Loc, 5871 ReductionCodeGen &RCG, unsigned N) { 5872 if (!RCG.needCleanups(N)) 5873 return nullptr; 5874 ASTContext &C = CGM.getContext(); 5875 FunctionArgList Args; 5876 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5877 ImplicitParamDecl::Other); 5878 Args.emplace_back(&Param); 5879 const auto &FnInfo = 5880 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5881 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5882 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5883 ".red_fini.", &CGM.getModule()); 5884 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5885 Fn->setDoesNotRecurse(); 5886 CodeGenFunction CGF(CGM); 5887 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5888 Address PrivateAddr = CGF.EmitLoadOfPointer( 5889 CGF.GetAddrOfLocalVar(&Param), 5890 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5891 llvm::Value *Size = nullptr; 5892 // If the size of the reduction item is non-constant, load it from global 5893 // threadprivate variable. 5894 if (RCG.getSizes(N).second) { 5895 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5896 CGF, CGM.getContext().getSizeType(), 5897 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5898 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5899 CGM.getContext().getSizeType(), Loc); 5900 } 5901 RCG.emitAggregateType(CGF, N, Size); 5902 // Emit the finalizer body: 5903 // <destroy>(<type>* %0) 5904 RCG.emitCleanups(CGF, N, PrivateAddr); 5905 CGF.FinishFunction(); 5906 return Fn; 5907 } 5908 5909 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5910 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5911 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5912 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5913 return nullptr; 5914 5915 // Build typedef struct: 5916 // kmp_task_red_input { 5917 // void *reduce_shar; // shared reduction item 5918 // size_t reduce_size; // size of data item 5919 // void *reduce_init; // data initialization routine 5920 // void *reduce_fini; // data finalization routine 5921 // void *reduce_comb; // data combiner routine 5922 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5923 // } kmp_task_red_input_t; 5924 ASTContext &C = CGM.getContext(); 5925 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 5926 RD->startDefinition(); 5927 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5928 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5929 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5930 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5931 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5932 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5933 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5934 RD->completeDefinition(); 5935 QualType RDType = C.getRecordType(RD); 5936 unsigned Size = Data.ReductionVars.size(); 5937 llvm::APInt ArraySize(/*numBits=*/64, Size); 5938 QualType ArrayRDType = C.getConstantArrayType( 5939 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 5940 // kmp_task_red_input_t .rd_input.[Size]; 5941 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5942 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 5943 Data.ReductionOps); 5944 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5945 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5946 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5947 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5948 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5949 TaskRedInput.getPointer(), Idxs, 5950 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5951 ".rd_input.gep."); 5952 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5953 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5954 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5955 RCG.emitSharedLValue(CGF, Cnt); 5956 llvm::Value *CastedShared = 5957 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 5958 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5959 RCG.emitAggregateType(CGF, Cnt); 5960 llvm::Value *SizeValInChars; 5961 llvm::Value *SizeVal; 5962 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5963 // We use delayed creation/initialization for VLAs, array sections and 5964 // custom reduction initializations. It is required because runtime does not 5965 // provide the way to pass the sizes of VLAs/array sections to 5966 // initializer/combiner/finalizer functions and does not pass the pointer to 5967 // original reduction item to the initializer. Instead threadprivate global 5968 // variables are used to store these values and use them in the functions. 5969 bool DelayedCreation = !!SizeVal; 5970 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5971 /*isSigned=*/false); 5972 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5973 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5974 // ElemLVal.reduce_init = init; 5975 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5976 llvm::Value *InitAddr = 5977 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 5978 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5979 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 5980 // ElemLVal.reduce_fini = fini; 5981 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5982 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5983 llvm::Value *FiniAddr = Fini 5984 ? CGF.EmitCastToVoidPtr(Fini) 5985 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5986 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5987 // ElemLVal.reduce_comb = comb; 5988 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5989 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 5990 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5991 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 5992 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5993 // ElemLVal.flags = 0; 5994 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5995 if (DelayedCreation) { 5996 CGF.EmitStoreOfScalar( 5997 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 5998 FlagsLVal); 5999 } else 6000 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6001 } 6002 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6003 // *data); 6004 llvm::Value *Args[] = { 6005 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6006 /*isSigned=*/true), 6007 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6008 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6009 CGM.VoidPtrTy)}; 6010 return CGF.EmitRuntimeCall( 6011 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6012 } 6013 6014 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6015 SourceLocation Loc, 6016 ReductionCodeGen &RCG, 6017 unsigned N) { 6018 auto Sizes = RCG.getSizes(N); 6019 // Emit threadprivate global variable if the type is non-constant 6020 // (Sizes.second = nullptr). 6021 if (Sizes.second) { 6022 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6023 /*isSigned=*/false); 6024 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6025 CGF, CGM.getContext().getSizeType(), 6026 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6027 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6028 } 6029 // Store address of the original reduction item if custom initializer is used. 6030 if (RCG.usesReductionInitializer(N)) { 6031 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6032 CGF, CGM.getContext().VoidPtrTy, 6033 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6034 CGF.Builder.CreateStore( 6035 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6036 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6037 SharedAddr, /*IsVolatile=*/false); 6038 } 6039 } 6040 6041 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6042 SourceLocation Loc, 6043 llvm::Value *ReductionsPtr, 6044 LValue SharedLVal) { 6045 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6046 // *d); 6047 llvm::Value *Args[] = { 6048 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6049 /*isSigned=*/true), 6050 ReductionsPtr, 6051 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6052 CGM.VoidPtrTy)}; 6053 return Address( 6054 CGF.EmitRuntimeCall( 6055 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6056 SharedLVal.getAlignment()); 6057 } 6058 6059 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6060 SourceLocation Loc) { 6061 if (!CGF.HaveInsertPoint()) 6062 return; 6063 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6064 // global_tid); 6065 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6066 // Ignore return result until untied tasks are supported. 6067 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6068 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6069 Region->emitUntiedSwitch(CGF); 6070 } 6071 6072 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6073 OpenMPDirectiveKind InnerKind, 6074 const RegionCodeGenTy &CodeGen, 6075 bool HasCancel) { 6076 if (!CGF.HaveInsertPoint()) 6077 return; 6078 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6079 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6080 } 6081 6082 namespace { 6083 enum RTCancelKind { 6084 CancelNoreq = 0, 6085 CancelParallel = 1, 6086 CancelLoop = 2, 6087 CancelSections = 3, 6088 CancelTaskgroup = 4 6089 }; 6090 } // anonymous namespace 6091 6092 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6093 RTCancelKind CancelKind = CancelNoreq; 6094 if (CancelRegion == OMPD_parallel) 6095 CancelKind = CancelParallel; 6096 else if (CancelRegion == OMPD_for) 6097 CancelKind = CancelLoop; 6098 else if (CancelRegion == OMPD_sections) 6099 CancelKind = CancelSections; 6100 else { 6101 assert(CancelRegion == OMPD_taskgroup); 6102 CancelKind = CancelTaskgroup; 6103 } 6104 return CancelKind; 6105 } 6106 6107 void CGOpenMPRuntime::emitCancellationPointCall( 6108 CodeGenFunction &CGF, SourceLocation Loc, 6109 OpenMPDirectiveKind CancelRegion) { 6110 if (!CGF.HaveInsertPoint()) 6111 return; 6112 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6113 // global_tid, kmp_int32 cncl_kind); 6114 if (auto *OMPRegionInfo = 6115 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6116 // For 'cancellation point taskgroup', the task region info may not have a 6117 // cancel. This may instead happen in another adjacent task. 6118 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6119 llvm::Value *Args[] = { 6120 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6121 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6122 // Ignore return result until untied tasks are supported. 6123 llvm::Value *Result = CGF.EmitRuntimeCall( 6124 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6125 // if (__kmpc_cancellationpoint()) { 6126 // exit from construct; 6127 // } 6128 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6129 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6130 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6131 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6132 CGF.EmitBlock(ExitBB); 6133 // exit from construct; 6134 CodeGenFunction::JumpDest CancelDest = 6135 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6136 CGF.EmitBranchThroughCleanup(CancelDest); 6137 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6138 } 6139 } 6140 } 6141 6142 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6143 const Expr *IfCond, 6144 OpenMPDirectiveKind CancelRegion) { 6145 if (!CGF.HaveInsertPoint()) 6146 return; 6147 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6148 // kmp_int32 cncl_kind); 6149 if (auto *OMPRegionInfo = 6150 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6151 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6152 PrePostActionTy &) { 6153 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6154 llvm::Value *Args[] = { 6155 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6156 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6157 // Ignore return result until untied tasks are supported. 6158 llvm::Value *Result = CGF.EmitRuntimeCall( 6159 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6160 // if (__kmpc_cancel()) { 6161 // exit from construct; 6162 // } 6163 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6164 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6165 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6166 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6167 CGF.EmitBlock(ExitBB); 6168 // exit from construct; 6169 CodeGenFunction::JumpDest CancelDest = 6170 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6171 CGF.EmitBranchThroughCleanup(CancelDest); 6172 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6173 }; 6174 if (IfCond) { 6175 emitOMPIfClause(CGF, IfCond, ThenGen, 6176 [](CodeGenFunction &, PrePostActionTy &) {}); 6177 } else { 6178 RegionCodeGenTy ThenRCG(ThenGen); 6179 ThenRCG(CGF); 6180 } 6181 } 6182 } 6183 6184 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6185 const OMPExecutableDirective &D, StringRef ParentName, 6186 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6187 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6188 assert(!ParentName.empty() && "Invalid target region parent name!"); 6189 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6190 IsOffloadEntry, CodeGen); 6191 } 6192 6193 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6194 const OMPExecutableDirective &D, StringRef ParentName, 6195 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6196 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6197 // Create a unique name for the entry function using the source location 6198 // information of the current target region. The name will be something like: 6199 // 6200 // __omp_offloading_DD_FFFF_PP_lBB 6201 // 6202 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6203 // mangled name of the function that encloses the target region and BB is the 6204 // line number of the target region. 6205 6206 unsigned DeviceID; 6207 unsigned FileID; 6208 unsigned Line; 6209 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 6210 Line); 6211 SmallString<64> EntryFnName; 6212 { 6213 llvm::raw_svector_ostream OS(EntryFnName); 6214 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6215 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6216 } 6217 6218 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6219 6220 CodeGenFunction CGF(CGM, true); 6221 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6222 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6223 6224 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6225 6226 // If this target outline function is not an offload entry, we don't need to 6227 // register it. 6228 if (!IsOffloadEntry) 6229 return; 6230 6231 // The target region ID is used by the runtime library to identify the current 6232 // target region, so it only has to be unique and not necessarily point to 6233 // anything. It could be the pointer to the outlined function that implements 6234 // the target region, but we aren't using that so that the compiler doesn't 6235 // need to keep that, and could therefore inline the host function if proven 6236 // worthwhile during optimization. In the other hand, if emitting code for the 6237 // device, the ID has to be the function address so that it can retrieved from 6238 // the offloading entry and launched by the runtime library. We also mark the 6239 // outlined function to have external linkage in case we are emitting code for 6240 // the device, because these functions will be entry points to the device. 6241 6242 if (CGM.getLangOpts().OpenMPIsDevice) { 6243 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6244 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 6245 OutlinedFn->setDSOLocal(false); 6246 } else { 6247 OutlinedFnID = new llvm::GlobalVariable( 6248 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6249 llvm::GlobalValue::PrivateLinkage, 6250 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 6251 } 6252 6253 // Register the information for the entry associated with this target region. 6254 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6255 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6256 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6257 } 6258 6259 /// discard all CompoundStmts intervening between two constructs 6260 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 6261 while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 6262 Body = CS->body_front(); 6263 6264 return Body; 6265 } 6266 6267 /// Emit the number of teams for a target directive. Inspect the num_teams 6268 /// clause associated with a teams construct combined or closely nested 6269 /// with the target directive. 6270 /// 6271 /// Emit a team of size one for directives such as 'target parallel' that 6272 /// have no associated teams construct. 6273 /// 6274 /// Otherwise, return nullptr. 6275 static llvm::Value * 6276 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6277 CodeGenFunction &CGF, 6278 const OMPExecutableDirective &D) { 6279 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6280 "teams directive expected to be " 6281 "emitted only for the host!"); 6282 6283 CGBuilderTy &Bld = CGF.Builder; 6284 6285 // If the target directive is combined with a teams directive: 6286 // Return the value in the num_teams clause, if any. 6287 // Otherwise, return 0 to denote the runtime default. 6288 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 6289 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 6290 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6291 llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 6292 /*IgnoreResultAssign*/ true); 6293 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6294 /*IsSigned=*/true); 6295 } 6296 6297 // The default value is 0. 6298 return Bld.getInt32(0); 6299 } 6300 6301 // If the target directive is combined with a parallel directive but not a 6302 // teams directive, start one team. 6303 if (isOpenMPParallelDirective(D.getDirectiveKind())) 6304 return Bld.getInt32(1); 6305 6306 // If the current target region has a teams region enclosed, we need to get 6307 // the number of teams to pass to the runtime function call. This is done 6308 // by generating the expression in a inlined region. This is required because 6309 // the expression is captured in the enclosing target environment when the 6310 // teams directive is not combined with target. 6311 6312 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6313 6314 if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6315 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6316 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6317 if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 6318 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6319 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6320 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 6321 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6322 /*IsSigned=*/true); 6323 } 6324 6325 // If we have an enclosed teams directive but no num_teams clause we use 6326 // the default value 0. 6327 return Bld.getInt32(0); 6328 } 6329 } 6330 6331 // No teams associated with the directive. 6332 return nullptr; 6333 } 6334 6335 /// Emit the number of threads for a target directive. Inspect the 6336 /// thread_limit clause associated with a teams construct combined or closely 6337 /// nested with the target directive. 6338 /// 6339 /// Emit the num_threads clause for directives such as 'target parallel' that 6340 /// have no associated teams construct. 6341 /// 6342 /// Otherwise, return nullptr. 6343 static llvm::Value * 6344 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6345 CodeGenFunction &CGF, 6346 const OMPExecutableDirective &D) { 6347 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6348 "teams directive expected to be " 6349 "emitted only for the host!"); 6350 6351 CGBuilderTy &Bld = CGF.Builder; 6352 6353 // 6354 // If the target directive is combined with a teams directive: 6355 // Return the value in the thread_limit clause, if any. 6356 // 6357 // If the target directive is combined with a parallel directive: 6358 // Return the value in the num_threads clause, if any. 6359 // 6360 // If both clauses are set, select the minimum of the two. 6361 // 6362 // If neither teams or parallel combined directives set the number of threads 6363 // in a team, return 0 to denote the runtime default. 6364 // 6365 // If this is not a teams directive return nullptr. 6366 6367 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 6368 isOpenMPParallelDirective(D.getDirectiveKind())) { 6369 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 6370 llvm::Value *NumThreadsVal = nullptr; 6371 llvm::Value *ThreadLimitVal = nullptr; 6372 6373 if (const auto *ThreadLimitClause = 6374 D.getSingleClause<OMPThreadLimitClause>()) { 6375 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6376 llvm::Value *ThreadLimit = 6377 CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 6378 /*IgnoreResultAssign*/ true); 6379 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6380 /*IsSigned=*/true); 6381 } 6382 6383 if (const auto *NumThreadsClause = 6384 D.getSingleClause<OMPNumThreadsClause>()) { 6385 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6386 llvm::Value *NumThreads = 6387 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 6388 /*IgnoreResultAssign*/ true); 6389 NumThreadsVal = 6390 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 6391 } 6392 6393 // Select the lesser of thread_limit and num_threads. 6394 if (NumThreadsVal) 6395 ThreadLimitVal = ThreadLimitVal 6396 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 6397 ThreadLimitVal), 6398 NumThreadsVal, ThreadLimitVal) 6399 : NumThreadsVal; 6400 6401 // Set default value passed to the runtime if either teams or a target 6402 // parallel type directive is found but no clause is specified. 6403 if (!ThreadLimitVal) 6404 ThreadLimitVal = DefaultThreadLimitVal; 6405 6406 return ThreadLimitVal; 6407 } 6408 6409 // If the current target region has a teams region enclosed, we need to get 6410 // the thread limit to pass to the runtime function call. This is done 6411 // by generating the expression in a inlined region. This is required because 6412 // the expression is captured in the enclosing target environment when the 6413 // teams directive is not combined with target. 6414 6415 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6416 6417 if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6418 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6419 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6420 if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 6421 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6422 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6423 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 6424 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6425 /*IsSigned=*/true); 6426 } 6427 6428 // If we have an enclosed teams directive but no thread_limit clause we 6429 // use the default value 0. 6430 return CGF.Builder.getInt32(0); 6431 } 6432 } 6433 6434 // No teams associated with the directive. 6435 return nullptr; 6436 } 6437 6438 namespace { 6439 // \brief Utility to handle information from clauses associated with a given 6440 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6441 // It provides a convenient interface to obtain the information and generate 6442 // code for that information. 6443 class MappableExprsHandler { 6444 public: 6445 /// \brief Values for bit flags used to specify the mapping type for 6446 /// offloading. 6447 enum OpenMPOffloadMappingFlags { 6448 /// \brief Allocate memory on the device and move data from host to device. 6449 OMP_MAP_TO = 0x01, 6450 /// \brief Allocate memory on the device and move data from device to host. 6451 OMP_MAP_FROM = 0x02, 6452 /// \brief Always perform the requested mapping action on the element, even 6453 /// if it was already mapped before. 6454 OMP_MAP_ALWAYS = 0x04, 6455 /// \brief Delete the element from the device environment, ignoring the 6456 /// current reference count associated with the element. 6457 OMP_MAP_DELETE = 0x08, 6458 /// \brief The element being mapped is a pointer-pointee pair; both the 6459 /// pointer and the pointee should be mapped. 6460 OMP_MAP_PTR_AND_OBJ = 0x10, 6461 /// \brief This flags signals that the base address of an entry should be 6462 /// passed to the target kernel as an argument. 6463 OMP_MAP_TARGET_PARAM = 0x20, 6464 /// \brief Signal that the runtime library has to return the device pointer 6465 /// in the current position for the data being mapped. Used when we have the 6466 /// use_device_ptr clause. 6467 OMP_MAP_RETURN_PARAM = 0x40, 6468 /// \brief This flag signals that the reference being passed is a pointer to 6469 /// private data. 6470 OMP_MAP_PRIVATE = 0x80, 6471 /// \brief Pass the element to the device by value. 6472 OMP_MAP_LITERAL = 0x100, 6473 /// Implicit map 6474 OMP_MAP_IMPLICIT = 0x200, 6475 }; 6476 6477 /// Class that associates information with a base pointer to be passed to the 6478 /// runtime library. 6479 class BasePointerInfo { 6480 /// The base pointer. 6481 llvm::Value *Ptr = nullptr; 6482 /// The base declaration that refers to this device pointer, or null if 6483 /// there is none. 6484 const ValueDecl *DevPtrDecl = nullptr; 6485 6486 public: 6487 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 6488 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 6489 llvm::Value *operator*() const { return Ptr; } 6490 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 6491 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 6492 }; 6493 6494 typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; 6495 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 6496 typedef SmallVector<uint64_t, 16> MapFlagsArrayTy; 6497 6498 private: 6499 /// \brief Directive from where the map clauses were extracted. 6500 const OMPExecutableDirective &CurDir; 6501 6502 /// \brief Function the directive is being generated for. 6503 CodeGenFunction &CGF; 6504 6505 /// \brief Set of all first private variables in the current directive. 6506 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 6507 /// Set of all reduction variables in the current directive. 6508 llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls; 6509 6510 /// Map between device pointer declarations and their expression components. 6511 /// The key value for declarations in 'this' is null. 6512 llvm::DenseMap< 6513 const ValueDecl *, 6514 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6515 DevPointersMap; 6516 6517 llvm::Value *getExprTypeSize(const Expr *E) const { 6518 QualType ExprTy = E->getType().getCanonicalType(); 6519 6520 // Reference types are ignored for mapping purposes. 6521 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6522 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6523 6524 // Given that an array section is considered a built-in type, we need to 6525 // do the calculation based on the length of the section instead of relying 6526 // on CGF.getTypeSize(E->getType()). 6527 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 6528 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 6529 OAE->getBase()->IgnoreParenImpCasts()) 6530 .getCanonicalType(); 6531 6532 // If there is no length associated with the expression, that means we 6533 // are using the whole length of the base. 6534 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 6535 return CGF.getTypeSize(BaseTy); 6536 6537 llvm::Value *ElemSize; 6538 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 6539 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6540 } else { 6541 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6542 assert(ATy && "Expecting array type if not a pointer type."); 6543 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6544 } 6545 6546 // If we don't have a length at this point, that is because we have an 6547 // array section with a single element. 6548 if (!OAE->getLength()) 6549 return ElemSize; 6550 6551 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 6552 LengthVal = 6553 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 6554 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6555 } 6556 return CGF.getTypeSize(ExprTy); 6557 } 6558 6559 /// \brief Return the corresponding bits for a given map clause modifier. Add 6560 /// a flag marking the map as a pointer if requested. Add a flag marking the 6561 /// map as the first one of a series of maps that relate to the same map 6562 /// expression. 6563 uint64_t getMapTypeBits(OpenMPMapClauseKind MapType, 6564 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 6565 bool AddIsTargetParamFlag) const { 6566 uint64_t Bits = 0u; 6567 switch (MapType) { 6568 case OMPC_MAP_alloc: 6569 case OMPC_MAP_release: 6570 // alloc and release is the default behavior in the runtime library, i.e. 6571 // if we don't pass any bits alloc/release that is what the runtime is 6572 // going to do. Therefore, we don't need to signal anything for these two 6573 // type modifiers. 6574 break; 6575 case OMPC_MAP_to: 6576 Bits = OMP_MAP_TO; 6577 break; 6578 case OMPC_MAP_from: 6579 Bits = OMP_MAP_FROM; 6580 break; 6581 case OMPC_MAP_tofrom: 6582 Bits = OMP_MAP_TO | OMP_MAP_FROM; 6583 break; 6584 case OMPC_MAP_delete: 6585 Bits = OMP_MAP_DELETE; 6586 break; 6587 default: 6588 llvm_unreachable("Unexpected map type!"); 6589 break; 6590 } 6591 if (AddPtrFlag) 6592 Bits |= OMP_MAP_PTR_AND_OBJ; 6593 if (AddIsTargetParamFlag) 6594 Bits |= OMP_MAP_TARGET_PARAM; 6595 if (MapTypeModifier == OMPC_MAP_always) 6596 Bits |= OMP_MAP_ALWAYS; 6597 return Bits; 6598 } 6599 6600 /// \brief Return true if the provided expression is a final array section. A 6601 /// final array section, is one whose length can't be proved to be one. 6602 bool isFinalArraySectionExpression(const Expr *E) const { 6603 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 6604 6605 // It is not an array section and therefore not a unity-size one. 6606 if (!OASE) 6607 return false; 6608 6609 // An array section with no colon always refer to a single element. 6610 if (OASE->getColonLoc().isInvalid()) 6611 return false; 6612 6613 const Expr *Length = OASE->getLength(); 6614 6615 // If we don't have a length we have to check if the array has size 1 6616 // for this dimension. Also, we should always expect a length if the 6617 // base type is pointer. 6618 if (!Length) { 6619 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 6620 OASE->getBase()->IgnoreParenImpCasts()) 6621 .getCanonicalType(); 6622 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6623 return ATy->getSize().getSExtValue() != 1; 6624 // If we don't have a constant dimension length, we have to consider 6625 // the current section as having any size, so it is not necessarily 6626 // unitary. If it happen to be unity size, that's user fault. 6627 return true; 6628 } 6629 6630 // Check if the length evaluates to 1. 6631 llvm::APSInt ConstLength; 6632 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 6633 return true; // Can have more that size 1. 6634 6635 return ConstLength.getSExtValue() != 1; 6636 } 6637 6638 /// \brief Return the adjusted map modifiers if the declaration a capture 6639 /// refers to appears in a first-private clause. This is expected to be used 6640 /// only with directives that start with 'target'. 6641 unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, 6642 unsigned CurrentModifiers) { 6643 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 6644 6645 // A first private variable captured by reference will use only the 6646 // 'private ptr' and 'map to' flag. Return the right flags if the captured 6647 // declaration is known as first-private in this handler. 6648 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 6649 return MappableExprsHandler::OMP_MAP_PRIVATE | 6650 MappableExprsHandler::OMP_MAP_TO; 6651 // Reduction variable will use only the 'private ptr' and 'map to_from' 6652 // flag. 6653 if (ReductionDecls.count(Cap.getCapturedVar())) { 6654 return MappableExprsHandler::OMP_MAP_TO | 6655 MappableExprsHandler::OMP_MAP_FROM; 6656 } 6657 6658 // We didn't modify anything. 6659 return CurrentModifiers; 6660 } 6661 6662 public: 6663 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 6664 : CurDir(Dir), CGF(CGF) { 6665 // Extract firstprivate clause information. 6666 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 6667 for (const Expr *D : C->varlists()) 6668 FirstPrivateDecls.insert( 6669 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 6670 for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) { 6671 for (const Expr *D : C->varlists()) { 6672 ReductionDecls.insert( 6673 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 6674 } 6675 } 6676 // Extract device pointer clause information. 6677 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 6678 for (const auto &L : C->component_lists()) 6679 DevPointersMap[L.first].push_back(L.second); 6680 } 6681 6682 /// \brief Generate the base pointers, section pointers, sizes and map type 6683 /// bits for the provided map type, map modifier, and expression components. 6684 /// \a IsFirstComponent should be set to true if the provided set of 6685 /// components is the first associated with a capture. 6686 void generateInfoForComponentList( 6687 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6688 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6689 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 6690 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 6691 bool IsFirstComponentList, bool IsImplicit) const { 6692 6693 // The following summarizes what has to be generated for each map and the 6694 // types below. The generated information is expressed in this order: 6695 // base pointer, section pointer, size, flags 6696 // (to add to the ones that come from the map type and modifier). 6697 // 6698 // double d; 6699 // int i[100]; 6700 // float *p; 6701 // 6702 // struct S1 { 6703 // int i; 6704 // float f[50]; 6705 // } 6706 // struct S2 { 6707 // int i; 6708 // float f[50]; 6709 // S1 s; 6710 // double *p; 6711 // struct S2 *ps; 6712 // } 6713 // S2 s; 6714 // S2 *ps; 6715 // 6716 // map(d) 6717 // &d, &d, sizeof(double), noflags 6718 // 6719 // map(i) 6720 // &i, &i, 100*sizeof(int), noflags 6721 // 6722 // map(i[1:23]) 6723 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 6724 // 6725 // map(p) 6726 // &p, &p, sizeof(float*), noflags 6727 // 6728 // map(p[1:24]) 6729 // p, &p[1], 24*sizeof(float), noflags 6730 // 6731 // map(s) 6732 // &s, &s, sizeof(S2), noflags 6733 // 6734 // map(s.i) 6735 // &s, &(s.i), sizeof(int), noflags 6736 // 6737 // map(s.s.f) 6738 // &s, &(s.i.f), 50*sizeof(int), noflags 6739 // 6740 // map(s.p) 6741 // &s, &(s.p), sizeof(double*), noflags 6742 // 6743 // map(s.p[:22], s.a s.b) 6744 // &s, &(s.p), sizeof(double*), noflags 6745 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag 6746 // 6747 // map(s.ps) 6748 // &s, &(s.ps), sizeof(S2*), noflags 6749 // 6750 // map(s.ps->s.i) 6751 // &s, &(s.ps), sizeof(S2*), noflags 6752 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag 6753 // 6754 // map(s.ps->ps) 6755 // &s, &(s.ps), sizeof(S2*), noflags 6756 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6757 // 6758 // map(s.ps->ps->ps) 6759 // &s, &(s.ps), sizeof(S2*), noflags 6760 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6761 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag 6762 // 6763 // map(s.ps->ps->s.f[:22]) 6764 // &s, &(s.ps), sizeof(S2*), noflags 6765 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag 6766 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag 6767 // 6768 // map(ps) 6769 // &ps, &ps, sizeof(S2*), noflags 6770 // 6771 // map(ps->i) 6772 // ps, &(ps->i), sizeof(int), noflags 6773 // 6774 // map(ps->s.f) 6775 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 6776 // 6777 // map(ps->p) 6778 // ps, &(ps->p), sizeof(double*), noflags 6779 // 6780 // map(ps->p[:22]) 6781 // ps, &(ps->p), sizeof(double*), noflags 6782 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag 6783 // 6784 // map(ps->ps) 6785 // ps, &(ps->ps), sizeof(S2*), noflags 6786 // 6787 // map(ps->ps->s.i) 6788 // ps, &(ps->ps), sizeof(S2*), noflags 6789 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag 6790 // 6791 // map(ps->ps->ps) 6792 // ps, &(ps->ps), sizeof(S2*), noflags 6793 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6794 // 6795 // map(ps->ps->ps->ps) 6796 // ps, &(ps->ps), sizeof(S2*), noflags 6797 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6798 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag 6799 // 6800 // map(ps->ps->ps->s.f[:22]) 6801 // ps, &(ps->ps), sizeof(S2*), noflags 6802 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag 6803 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag 6804 6805 // Track if the map information being generated is the first for a capture. 6806 bool IsCaptureFirstInfo = IsFirstComponentList; 6807 bool IsLink = false; // Is this variable a "declare target link"? 6808 6809 // Scan the components from the base to the complete expression. 6810 auto CI = Components.rbegin(); 6811 auto CE = Components.rend(); 6812 auto I = CI; 6813 6814 // Track if the map information being generated is the first for a list of 6815 // components. 6816 bool IsExpressionFirstInfo = true; 6817 llvm::Value *BP = nullptr; 6818 6819 if (const auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 6820 // The base is the 'this' pointer. The content of the pointer is going 6821 // to be the base of the field being mapped. 6822 BP = CGF.EmitScalarExpr(ME->getBase()); 6823 } else { 6824 // The base is the reference to the variable. 6825 // BP = &Var. 6826 BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); 6827 if (const auto *VD = 6828 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 6829 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 6830 isDeclareTargetDeclaration(VD)) 6831 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { 6832 IsLink = true; 6833 BP = CGF.CGM.getOpenMPRuntime() 6834 .getAddrOfDeclareTargetLink(VD) 6835 .getPointer(); 6836 } 6837 } 6838 6839 // If the variable is a pointer and is being dereferenced (i.e. is not 6840 // the last component), the base has to be the pointer itself, not its 6841 // reference. References are ignored for mapping purposes. 6842 QualType Ty = 6843 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 6844 if (Ty->isAnyPointerType() && std::next(I) != CE) { 6845 LValue PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); 6846 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 6847 Ty->castAs<PointerType>()) 6848 .getPointer(); 6849 6850 // We do not need to generate individual map information for the 6851 // pointer, it can be associated with the combined storage. 6852 ++I; 6853 } 6854 } 6855 6856 uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0; 6857 for (; I != CE; ++I) { 6858 auto Next = std::next(I); 6859 6860 // We need to generate the addresses and sizes if this is the last 6861 // component, if the component is a pointer or if it is an array section 6862 // whose length can't be proved to be one. If this is a pointer, it 6863 // becomes the base address for the following components. 6864 6865 // A final array section, is one whose length can't be proved to be one. 6866 bool IsFinalArraySection = 6867 isFinalArraySectionExpression(I->getAssociatedExpression()); 6868 6869 // Get information on whether the element is a pointer. Have to do a 6870 // special treatment for array sections given that they are built-in 6871 // types. 6872 const auto *OASE = 6873 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 6874 bool IsPointer = 6875 (OASE && 6876 OMPArraySectionExpr::getBaseOriginalType(OASE) 6877 .getCanonicalType() 6878 ->isAnyPointerType()) || 6879 I->getAssociatedExpression()->getType()->isAnyPointerType(); 6880 6881 if (Next == CE || IsPointer || IsFinalArraySection) { 6882 // If this is not the last component, we expect the pointer to be 6883 // associated with an array expression or member expression. 6884 assert((Next == CE || 6885 isa<MemberExpr>(Next->getAssociatedExpression()) || 6886 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 6887 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 6888 "Unexpected expression"); 6889 6890 llvm::Value *LB = 6891 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); 6892 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 6893 6894 // If we have a member expression and the current component is a 6895 // reference, we have to map the reference too. Whenever we have a 6896 // reference, the section that reference refers to is going to be a 6897 // load instruction from the storage assigned to the reference. 6898 if (isa<MemberExpr>(I->getAssociatedExpression()) && 6899 I->getAssociatedDeclaration()->getType()->isReferenceType()) { 6900 auto *LI = cast<llvm::LoadInst>(LB); 6901 llvm::Value *RefAddr = LI->getPointerOperand(); 6902 6903 BasePointers.push_back(BP); 6904 Pointers.push_back(RefAddr); 6905 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 6906 Types.push_back(DefaultFlags | 6907 getMapTypeBits( 6908 /*MapType*/ OMPC_MAP_alloc, 6909 /*MapTypeModifier=*/OMPC_MAP_unknown, 6910 !IsExpressionFirstInfo, IsCaptureFirstInfo)); 6911 IsExpressionFirstInfo = false; 6912 IsCaptureFirstInfo = false; 6913 // The reference will be the next base address. 6914 BP = RefAddr; 6915 } 6916 6917 BasePointers.push_back(BP); 6918 Pointers.push_back(LB); 6919 Sizes.push_back(Size); 6920 6921 // We need to add a pointer flag for each map that comes from the 6922 // same expression except for the first one. We also need to signal 6923 // this map is the first one that relates with the current capture 6924 // (there is a set of entries for each capture). 6925 Types.push_back(DefaultFlags | 6926 getMapTypeBits(MapType, MapTypeModifier, 6927 !IsExpressionFirstInfo || IsLink, 6928 IsCaptureFirstInfo && !IsLink)); 6929 6930 // If we have a final array section, we are done with this expression. 6931 if (IsFinalArraySection) 6932 break; 6933 6934 // The pointer becomes the base for the next element. 6935 if (Next != CE) 6936 BP = LB; 6937 6938 IsExpressionFirstInfo = false; 6939 IsCaptureFirstInfo = false; 6940 } 6941 } 6942 } 6943 6944 /// \brief Generate all the base pointers, section pointers, sizes and map 6945 /// types for the extracted mappable expressions. Also, for each item that 6946 /// relates with a device pointer, a pair of the relevant declaration and 6947 /// index where it occurs is appended to the device pointers info array. 6948 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 6949 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 6950 MapFlagsArrayTy &Types) const { 6951 BasePointers.clear(); 6952 Pointers.clear(); 6953 Sizes.clear(); 6954 Types.clear(); 6955 6956 struct MapInfo { 6957 /// Kind that defines how a device pointer has to be returned. 6958 enum ReturnPointerKind { 6959 // Don't have to return any pointer. 6960 RPK_None, 6961 // Pointer is the base of the declaration. 6962 RPK_Base, 6963 // Pointer is a member of the base declaration - 'this' 6964 RPK_Member, 6965 // Pointer is a reference and a member of the base declaration - 'this' 6966 RPK_MemberReference, 6967 }; 6968 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6969 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6970 OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; 6971 ReturnPointerKind ReturnDevicePointer = RPK_None; 6972 bool IsImplicit = false; 6973 6974 MapInfo() = default; 6975 MapInfo( 6976 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6977 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6978 ReturnPointerKind ReturnDevicePointer, bool IsImplicit) 6979 : Components(Components), MapType(MapType), 6980 MapTypeModifier(MapTypeModifier), 6981 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 6982 }; 6983 6984 // We have to process the component lists that relate with the same 6985 // declaration in a single chunk so that we can generate the map flags 6986 // correctly. Therefore, we organize all lists in a map. 6987 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 6988 6989 // Helper function to fill the information map for the different supported 6990 // clauses. 6991 auto &&InfoGen = [&Info]( 6992 const ValueDecl *D, 6993 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 6994 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 6995 MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) { 6996 const ValueDecl *VD = 6997 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 6998 Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, 6999 IsImplicit); 7000 }; 7001 7002 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7003 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7004 for (const auto &L : C->component_lists()) { 7005 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 7006 MapInfo::RPK_None, C->isImplicit()); 7007 } 7008 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 7009 for (const auto &L : C->component_lists()) { 7010 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 7011 MapInfo::RPK_None, C->isImplicit()); 7012 } 7013 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 7014 for (const auto &L : C->component_lists()) { 7015 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 7016 MapInfo::RPK_None, C->isImplicit()); 7017 } 7018 7019 // Look at the use_device_ptr clause information and mark the existing map 7020 // entries as such. If there is no map information for an entry in the 7021 // use_device_ptr list, we create one with map type 'alloc' and zero size 7022 // section. It is the user fault if that was not mapped before. 7023 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7024 for (const auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) 7025 for (const auto &L : C->component_lists()) { 7026 assert(!L.second.empty() && "Not expecting empty list of components!"); 7027 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7028 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7029 const Expr *IE = L.second.back().getAssociatedExpression(); 7030 // If the first component is a member expression, we have to look into 7031 // 'this', which maps to null in the map of map information. Otherwise 7032 // look directly for the information. 7033 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7034 7035 // We potentially have map information for this declaration already. 7036 // Look for the first set of components that refer to it. 7037 if (It != Info.end()) { 7038 auto CI = std::find_if( 7039 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7040 return MI.Components.back().getAssociatedDeclaration() == VD; 7041 }); 7042 // If we found a map entry, signal that the pointer has to be returned 7043 // and move on to the next declaration. 7044 if (CI != It->second.end()) { 7045 CI->ReturnDevicePointer = isa<MemberExpr>(IE) 7046 ? (VD->getType()->isReferenceType() 7047 ? MapInfo::RPK_MemberReference 7048 : MapInfo::RPK_Member) 7049 : MapInfo::RPK_Base; 7050 continue; 7051 } 7052 } 7053 7054 // We didn't find any match in our map information - generate a zero 7055 // size array section. 7056 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 7057 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(IE), 7058 IE->getExprLoc()); 7059 BasePointers.push_back({Ptr, VD}); 7060 Pointers.push_back(Ptr); 7061 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7062 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 7063 } 7064 7065 for (const auto &M : Info) { 7066 // We need to know when we generate information for the first component 7067 // associated with a capture, because the mapping flags depend on it. 7068 bool IsFirstComponentList = true; 7069 for (const MapInfo &L : M.second) { 7070 assert(!L.Components.empty() && 7071 "Not expecting declaration with no component lists."); 7072 7073 // Remember the current base pointer index. 7074 unsigned CurrentBasePointersIdx = BasePointers.size(); 7075 // FIXME: MSVC 2013 seems to require this-> to find the member method. 7076 this->generateInfoForComponentList( 7077 L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, 7078 Sizes, Types, IsFirstComponentList, L.IsImplicit); 7079 7080 // If this entry relates with a device pointer, set the relevant 7081 // declaration and add the 'return pointer' flag. 7082 if (IsFirstComponentList && 7083 L.ReturnDevicePointer != MapInfo::RPK_None) { 7084 // If the pointer is not the base of the map, we need to skip the 7085 // base. If it is a reference in a member field, we also need to skip 7086 // the map of the reference. 7087 if (L.ReturnDevicePointer != MapInfo::RPK_Base) { 7088 ++CurrentBasePointersIdx; 7089 if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) 7090 ++CurrentBasePointersIdx; 7091 } 7092 assert(BasePointers.size() > CurrentBasePointersIdx && 7093 "Unexpected number of mapped base pointers."); 7094 7095 const ValueDecl *RelevantVD = 7096 L.Components.back().getAssociatedDeclaration(); 7097 assert(RelevantVD && 7098 "No relevant declaration related with device pointer??"); 7099 7100 BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 7101 Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 7102 } 7103 IsFirstComponentList = false; 7104 } 7105 } 7106 } 7107 7108 /// \brief Generate the base pointers, section pointers, sizes and map types 7109 /// associated to a given capture. 7110 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 7111 llvm::Value *Arg, 7112 MapBaseValuesArrayTy &BasePointers, 7113 MapValuesArrayTy &Pointers, 7114 MapValuesArrayTy &Sizes, 7115 MapFlagsArrayTy &Types) const { 7116 assert(!Cap->capturesVariableArrayType() && 7117 "Not expecting to generate map info for a variable array type!"); 7118 7119 BasePointers.clear(); 7120 Pointers.clear(); 7121 Sizes.clear(); 7122 Types.clear(); 7123 7124 // We need to know when we generating information for the first component 7125 // associated with a capture, because the mapping flags depend on it. 7126 bool IsFirstComponentList = true; 7127 7128 const ValueDecl *VD = 7129 Cap->capturesThis() 7130 ? nullptr 7131 : Cap->getCapturedVar()->getCanonicalDecl(); 7132 7133 // If this declaration appears in a is_device_ptr clause we just have to 7134 // pass the pointer by value. If it is a reference to a declaration, we just 7135 // pass its value, otherwise, if it is a member expression, we need to map 7136 // 'to' the field. 7137 if (!VD) { 7138 auto It = DevPointersMap.find(VD); 7139 if (It != DevPointersMap.end()) { 7140 for (ArrayRef<OMPClauseMappableExprCommon::MappableComponent> L : 7141 It->second) { 7142 generateInfoForComponentList( 7143 /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, 7144 BasePointers, Pointers, Sizes, Types, IsFirstComponentList, 7145 /*IsImplicit=*/false); 7146 IsFirstComponentList = false; 7147 } 7148 return; 7149 } 7150 } else if (DevPointersMap.count(VD)) { 7151 BasePointers.emplace_back(Arg, VD); 7152 Pointers.push_back(Arg); 7153 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 7154 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 7155 return; 7156 } 7157 7158 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7159 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7160 for (const auto &L : C->decl_component_lists(VD)) { 7161 assert(L.first == VD && 7162 "We got information for the wrong declaration??"); 7163 assert(!L.second.empty() && 7164 "Not expecting declaration with no component lists."); 7165 generateInfoForComponentList( 7166 C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, 7167 Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit()); 7168 IsFirstComponentList = false; 7169 } 7170 7171 return; 7172 } 7173 7174 /// \brief Generate the default map information for a given capture \a CI, 7175 /// record field declaration \a RI and captured value \a CV. 7176 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 7177 const FieldDecl &RI, llvm::Value *CV, 7178 MapBaseValuesArrayTy &CurBasePointers, 7179 MapValuesArrayTy &CurPointers, 7180 MapValuesArrayTy &CurSizes, 7181 MapFlagsArrayTy &CurMapTypes) { 7182 7183 // Do the default mapping. 7184 if (CI.capturesThis()) { 7185 CurBasePointers.push_back(CV); 7186 CurPointers.push_back(CV); 7187 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 7188 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 7189 // Default map type. 7190 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 7191 } else if (CI.capturesVariableByCopy()) { 7192 CurBasePointers.push_back(CV); 7193 CurPointers.push_back(CV); 7194 if (!RI.getType()->isAnyPointerType()) { 7195 // We have to signal to the runtime captures passed by value that are 7196 // not pointers. 7197 CurMapTypes.push_back(OMP_MAP_LITERAL); 7198 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 7199 } else { 7200 // Pointers are implicitly mapped with a zero size and no flags 7201 // (other than first map that is added for all implicit maps). 7202 CurMapTypes.push_back(0u); 7203 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 7204 } 7205 } else { 7206 assert(CI.capturesVariable() && "Expected captured reference."); 7207 CurBasePointers.push_back(CV); 7208 CurPointers.push_back(CV); 7209 7210 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 7211 QualType ElementType = PtrTy->getPointeeType(); 7212 CurSizes.push_back(CGF.getTypeSize(ElementType)); 7213 // The default map type for a scalar/complex type is 'to' because by 7214 // default the value doesn't have to be retrieved. For an aggregate 7215 // type, the default is 'tofrom'. 7216 CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses( 7217 CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) 7218 : OMP_MAP_TO)); 7219 } 7220 // Every default map produces a single argument which is a target parameter. 7221 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 7222 } 7223 }; 7224 7225 enum OpenMPOffloadingReservedDeviceIDs { 7226 /// \brief Device ID if the device was not defined, runtime should get it 7227 /// from environment variables in the spec. 7228 OMP_DEVICEID_UNDEF = -1, 7229 }; 7230 } // anonymous namespace 7231 7232 /// \brief Emit the arrays used to pass the captures and map information to the 7233 /// offloading runtime library. If there is no map or capture information, 7234 /// return nullptr by reference. 7235 static void 7236 emitOffloadingArrays(CodeGenFunction &CGF, 7237 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 7238 MappableExprsHandler::MapValuesArrayTy &Pointers, 7239 MappableExprsHandler::MapValuesArrayTy &Sizes, 7240 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 7241 CGOpenMPRuntime::TargetDataInfo &Info) { 7242 CodeGenModule &CGM = CGF.CGM; 7243 ASTContext &Ctx = CGF.getContext(); 7244 7245 // Reset the array information. 7246 Info.clearArrayInfo(); 7247 Info.NumberOfPtrs = BasePointers.size(); 7248 7249 if (Info.NumberOfPtrs) { 7250 // Detect if we have any capture size requiring runtime evaluation of the 7251 // size so that a constant array could be eventually used. 7252 bool hasRuntimeEvaluationCaptureSize = false; 7253 for (llvm::Value *S : Sizes) 7254 if (!isa<llvm::Constant>(S)) { 7255 hasRuntimeEvaluationCaptureSize = true; 7256 break; 7257 } 7258 7259 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 7260 QualType PointerArrayType = 7261 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 7262 /*IndexTypeQuals=*/0); 7263 7264 Info.BasePointersArray = 7265 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 7266 Info.PointersArray = 7267 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 7268 7269 // If we don't have any VLA types or other types that require runtime 7270 // evaluation, we can use a constant array for the map sizes, otherwise we 7271 // need to fill up the arrays as we do for the pointers. 7272 if (hasRuntimeEvaluationCaptureSize) { 7273 QualType SizeArrayType = Ctx.getConstantArrayType( 7274 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 7275 /*IndexTypeQuals=*/0); 7276 Info.SizesArray = 7277 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 7278 } else { 7279 // We expect all the sizes to be constant, so we collect them to create 7280 // a constant array. 7281 SmallVector<llvm::Constant *, 16> ConstSizes; 7282 for (llvm::Value *S : Sizes) 7283 ConstSizes.push_back(cast<llvm::Constant>(S)); 7284 7285 auto *SizesArrayInit = llvm::ConstantArray::get( 7286 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 7287 auto *SizesArrayGbl = new llvm::GlobalVariable( 7288 CGM.getModule(), SizesArrayInit->getType(), 7289 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 7290 SizesArrayInit, ".offload_sizes"); 7291 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 7292 Info.SizesArray = SizesArrayGbl; 7293 } 7294 7295 // The map types are always constant so we don't need to generate code to 7296 // fill arrays. Instead, we create an array constant. 7297 llvm::Constant *MapTypesArrayInit = 7298 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 7299 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 7300 CGM.getModule(), MapTypesArrayInit->getType(), 7301 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 7302 MapTypesArrayInit, ".offload_maptypes"); 7303 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 7304 Info.MapTypesArray = MapTypesArrayGbl; 7305 7306 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 7307 llvm::Value *BPVal = *BasePointers[I]; 7308 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 7309 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7310 Info.BasePointersArray, 0, I); 7311 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7312 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 7313 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 7314 CGF.Builder.CreateStore(BPVal, BPAddr); 7315 7316 if (Info.requiresDevicePointerInfo()) 7317 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 7318 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 7319 7320 llvm::Value *PVal = Pointers[I]; 7321 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 7322 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7323 Info.PointersArray, 0, I); 7324 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7325 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 7326 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 7327 CGF.Builder.CreateStore(PVal, PAddr); 7328 7329 if (hasRuntimeEvaluationCaptureSize) { 7330 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 7331 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 7332 Info.SizesArray, 7333 /*Idx0=*/0, 7334 /*Idx1=*/I); 7335 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 7336 CGF.Builder.CreateStore( 7337 CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), 7338 SAddr); 7339 } 7340 } 7341 } 7342 } 7343 /// \brief Emit the arguments to be passed to the runtime library based on the 7344 /// arrays of pointers, sizes and map types. 7345 static void emitOffloadingArraysArgument( 7346 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 7347 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 7348 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 7349 CodeGenModule &CGM = CGF.CGM; 7350 if (Info.NumberOfPtrs) { 7351 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7352 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7353 Info.BasePointersArray, 7354 /*Idx0=*/0, /*Idx1=*/0); 7355 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7356 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7357 Info.PointersArray, 7358 /*Idx0=*/0, 7359 /*Idx1=*/0); 7360 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7361 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 7362 /*Idx0=*/0, /*Idx1=*/0); 7363 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7364 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 7365 Info.MapTypesArray, 7366 /*Idx0=*/0, 7367 /*Idx1=*/0); 7368 } else { 7369 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 7370 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 7371 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 7372 MapTypesArrayArg = 7373 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 7374 } 7375 } 7376 7377 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 7378 const OMPExecutableDirective &D, 7379 llvm::Value *OutlinedFn, 7380 llvm::Value *OutlinedFnID, 7381 const Expr *IfCond, const Expr *Device) { 7382 if (!CGF.HaveInsertPoint()) 7383 return; 7384 7385 assert(OutlinedFn && "Invalid outlined function!"); 7386 7387 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 7388 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 7389 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 7390 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 7391 PrePostActionTy &) { 7392 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7393 }; 7394 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 7395 7396 CodeGenFunction::OMPTargetDataInfo InputInfo; 7397 llvm::Value *MapTypesArray = nullptr; 7398 // Fill up the pointer arrays and transfer execution to the device. 7399 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 7400 &MapTypesArray, &CS, RequiresOuterTask, 7401 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 7402 // On top of the arrays that were filled up, the target offloading call 7403 // takes as arguments the device id as well as the host pointer. The host 7404 // pointer is used by the runtime library to identify the current target 7405 // region, so it only has to be unique and not necessarily point to 7406 // anything. It could be the pointer to the outlined function that 7407 // implements the target region, but we aren't using that so that the 7408 // compiler doesn't need to keep that, and could therefore inline the host 7409 // function if proven worthwhile during optimization. 7410 7411 // From this point on, we need to have an ID of the target region defined. 7412 assert(OutlinedFnID && "Invalid outlined function ID!"); 7413 7414 // Emit device ID if any. 7415 llvm::Value *DeviceID; 7416 if (Device) { 7417 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7418 CGF.Int64Ty, /*isSigned=*/true); 7419 } else { 7420 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7421 } 7422 7423 // Emit the number of elements in the offloading arrays. 7424 llvm::Value *PointerNum = 7425 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 7426 7427 // Return value of the runtime offloading call. 7428 llvm::Value *Return; 7429 7430 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); 7431 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); 7432 7433 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 7434 // The target region is an outlined function launched by the runtime 7435 // via calls __tgt_target() or __tgt_target_teams(). 7436 // 7437 // __tgt_target() launches a target region with one team and one thread, 7438 // executing a serial region. This master thread may in turn launch 7439 // more threads within its team upon encountering a parallel region, 7440 // however, no additional teams can be launched on the device. 7441 // 7442 // __tgt_target_teams() launches a target region with one or more teams, 7443 // each with one or more threads. This call is required for target 7444 // constructs such as: 7445 // 'target teams' 7446 // 'target' / 'teams' 7447 // 'target teams distribute parallel for' 7448 // 'target parallel' 7449 // and so on. 7450 // 7451 // Note that on the host and CPU targets, the runtime implementation of 7452 // these calls simply call the outlined function without forking threads. 7453 // The outlined functions themselves have runtime calls to 7454 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 7455 // the compiler in emitTeamsCall() and emitParallelCall(). 7456 // 7457 // In contrast, on the NVPTX target, the implementation of 7458 // __tgt_target_teams() launches a GPU kernel with the requested number 7459 // of teams and threads so no additional calls to the runtime are required. 7460 if (NumTeams) { 7461 // If we have NumTeams defined this means that we have an enclosed teams 7462 // region. Therefore we also expect to have NumThreads defined. These two 7463 // values should be defined in the presence of a teams directive, 7464 // regardless of having any clauses associated. If the user is using teams 7465 // but no clauses, these two values will be the default that should be 7466 // passed to the runtime library - a 32-bit integer with the value zero. 7467 assert(NumThreads && "Thread limit expression should be available along " 7468 "with number of teams."); 7469 llvm::Value *OffloadingArgs[] = {DeviceID, 7470 OutlinedFnID, 7471 PointerNum, 7472 InputInfo.BasePointersArray.getPointer(), 7473 InputInfo.PointersArray.getPointer(), 7474 InputInfo.SizesArray.getPointer(), 7475 MapTypesArray, 7476 NumTeams, 7477 NumThreads}; 7478 Return = CGF.EmitRuntimeCall( 7479 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 7480 : OMPRTL__tgt_target_teams), 7481 OffloadingArgs); 7482 } else { 7483 llvm::Value *OffloadingArgs[] = {DeviceID, 7484 OutlinedFnID, 7485 PointerNum, 7486 InputInfo.BasePointersArray.getPointer(), 7487 InputInfo.PointersArray.getPointer(), 7488 InputInfo.SizesArray.getPointer(), 7489 MapTypesArray}; 7490 Return = CGF.EmitRuntimeCall( 7491 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 7492 : OMPRTL__tgt_target), 7493 OffloadingArgs); 7494 } 7495 7496 // Check the error code and execute the host version if required. 7497 llvm::BasicBlock *OffloadFailedBlock = 7498 CGF.createBasicBlock("omp_offload.failed"); 7499 llvm::BasicBlock *OffloadContBlock = 7500 CGF.createBasicBlock("omp_offload.cont"); 7501 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 7502 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 7503 7504 CGF.EmitBlock(OffloadFailedBlock); 7505 if (RequiresOuterTask) { 7506 CapturedVars.clear(); 7507 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7508 } 7509 emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); 7510 CGF.EmitBranch(OffloadContBlock); 7511 7512 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 7513 }; 7514 7515 // Notify that the host version must be executed. 7516 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 7517 RequiresOuterTask](CodeGenFunction &CGF, 7518 PrePostActionTy &) { 7519 if (RequiresOuterTask) { 7520 CapturedVars.clear(); 7521 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7522 } 7523 emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); 7524 }; 7525 7526 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 7527 &CapturedVars, RequiresOuterTask, 7528 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 7529 // Fill up the arrays with all the captured variables. 7530 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7531 MappableExprsHandler::MapValuesArrayTy Pointers; 7532 MappableExprsHandler::MapValuesArrayTy Sizes; 7533 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7534 7535 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 7536 MappableExprsHandler::MapValuesArrayTy CurPointers; 7537 MappableExprsHandler::MapValuesArrayTy CurSizes; 7538 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 7539 7540 // Get mappable expression information. 7541 MappableExprsHandler MEHandler(D, CGF); 7542 7543 auto RI = CS.getCapturedRecordDecl()->field_begin(); 7544 auto CV = CapturedVars.begin(); 7545 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 7546 CE = CS.capture_end(); 7547 CI != CE; ++CI, ++RI, ++CV) { 7548 CurBasePointers.clear(); 7549 CurPointers.clear(); 7550 CurSizes.clear(); 7551 CurMapTypes.clear(); 7552 7553 // VLA sizes are passed to the outlined region by copy and do not have map 7554 // information associated. 7555 if (CI->capturesVariableArrayType()) { 7556 CurBasePointers.push_back(*CV); 7557 CurPointers.push_back(*CV); 7558 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 7559 // Copy to the device as an argument. No need to retrieve it. 7560 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 7561 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 7562 } else { 7563 // If we have any information in the map clause, we use it, otherwise we 7564 // just do a default mapping. 7565 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 7566 CurSizes, CurMapTypes); 7567 if (CurBasePointers.empty()) 7568 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 7569 CurPointers, CurSizes, CurMapTypes); 7570 } 7571 // We expect to have at least an element of information for this capture. 7572 assert(!CurBasePointers.empty() && 7573 "Non-existing map pointer for capture!"); 7574 assert(CurBasePointers.size() == CurPointers.size() && 7575 CurBasePointers.size() == CurSizes.size() && 7576 CurBasePointers.size() == CurMapTypes.size() && 7577 "Inconsistent map information sizes!"); 7578 7579 // We need to append the results of this capture to what we already have. 7580 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7581 Pointers.append(CurPointers.begin(), CurPointers.end()); 7582 Sizes.append(CurSizes.begin(), CurSizes.end()); 7583 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 7584 } 7585 // Map other list items in the map clause which are not captured variables 7586 // but "declare target link" global variables. 7587 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) { 7588 for (const auto &L : C->component_lists()) { 7589 if (!L.first) 7590 continue; 7591 const auto *VD = dyn_cast<VarDecl>(L.first); 7592 if (!VD) 7593 continue; 7594 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7595 isDeclareTargetDeclaration(VD); 7596 if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 7597 continue; 7598 MEHandler.generateInfoForComponentList( 7599 C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, 7600 Pointers, Sizes, MapTypes, /*IsFirstComponentList=*/true, 7601 C->isImplicit()); 7602 } 7603 } 7604 7605 TargetDataInfo Info; 7606 // Fill up the arrays and create the arguments. 7607 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7608 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 7609 Info.PointersArray, Info.SizesArray, 7610 Info.MapTypesArray, Info); 7611 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 7612 InputInfo.BasePointersArray = 7613 Address(Info.BasePointersArray, CGM.getPointerAlign()); 7614 InputInfo.PointersArray = 7615 Address(Info.PointersArray, CGM.getPointerAlign()); 7616 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 7617 MapTypesArray = Info.MapTypesArray; 7618 if (RequiresOuterTask) 7619 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 7620 else 7621 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 7622 }; 7623 7624 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 7625 CodeGenFunction &CGF, PrePostActionTy &) { 7626 if (RequiresOuterTask) { 7627 CodeGenFunction::OMPTargetDataInfo InputInfo; 7628 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 7629 } else { 7630 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 7631 } 7632 }; 7633 7634 // If we have a target function ID it means that we need to support 7635 // offloading, otherwise, just execute on the host. We need to execute on host 7636 // regardless of the conditional in the if clause if, e.g., the user do not 7637 // specify target triples. 7638 if (OutlinedFnID) { 7639 if (IfCond) { 7640 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 7641 } else { 7642 RegionCodeGenTy ThenRCG(TargetThenGen); 7643 ThenRCG(CGF); 7644 } 7645 } else { 7646 RegionCodeGenTy ElseRCG(TargetElseGen); 7647 ElseRCG(CGF); 7648 } 7649 } 7650 7651 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 7652 StringRef ParentName) { 7653 if (!S) 7654 return; 7655 7656 // Codegen OMP target directives that offload compute to the device. 7657 bool RequiresDeviceCodegen = 7658 isa<OMPExecutableDirective>(S) && 7659 isOpenMPTargetExecutionDirective( 7660 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 7661 7662 if (RequiresDeviceCodegen) { 7663 const auto &E = *cast<OMPExecutableDirective>(S); 7664 unsigned DeviceID; 7665 unsigned FileID; 7666 unsigned Line; 7667 getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, 7668 FileID, Line); 7669 7670 // Is this a target region that should not be emitted as an entry point? If 7671 // so just signal we are done with this target region. 7672 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 7673 ParentName, Line)) 7674 return; 7675 7676 switch (E.getDirectiveKind()) { 7677 case OMPD_target: 7678 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 7679 cast<OMPTargetDirective>(E)); 7680 break; 7681 case OMPD_target_parallel: 7682 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7683 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 7684 break; 7685 case OMPD_target_teams: 7686 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 7687 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 7688 break; 7689 case OMPD_target_teams_distribute: 7690 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 7691 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 7692 break; 7693 case OMPD_target_teams_distribute_simd: 7694 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 7695 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 7696 break; 7697 case OMPD_target_parallel_for: 7698 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 7699 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 7700 break; 7701 case OMPD_target_parallel_for_simd: 7702 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 7703 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 7704 break; 7705 case OMPD_target_simd: 7706 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 7707 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 7708 break; 7709 case OMPD_target_teams_distribute_parallel_for: 7710 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 7711 CGM, ParentName, 7712 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 7713 break; 7714 case OMPD_target_teams_distribute_parallel_for_simd: 7715 CodeGenFunction:: 7716 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 7717 CGM, ParentName, 7718 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 7719 break; 7720 case OMPD_parallel: 7721 case OMPD_for: 7722 case OMPD_parallel_for: 7723 case OMPD_parallel_sections: 7724 case OMPD_for_simd: 7725 case OMPD_parallel_for_simd: 7726 case OMPD_cancel: 7727 case OMPD_cancellation_point: 7728 case OMPD_ordered: 7729 case OMPD_threadprivate: 7730 case OMPD_task: 7731 case OMPD_simd: 7732 case OMPD_sections: 7733 case OMPD_section: 7734 case OMPD_single: 7735 case OMPD_master: 7736 case OMPD_critical: 7737 case OMPD_taskyield: 7738 case OMPD_barrier: 7739 case OMPD_taskwait: 7740 case OMPD_taskgroup: 7741 case OMPD_atomic: 7742 case OMPD_flush: 7743 case OMPD_teams: 7744 case OMPD_target_data: 7745 case OMPD_target_exit_data: 7746 case OMPD_target_enter_data: 7747 case OMPD_distribute: 7748 case OMPD_distribute_simd: 7749 case OMPD_distribute_parallel_for: 7750 case OMPD_distribute_parallel_for_simd: 7751 case OMPD_teams_distribute: 7752 case OMPD_teams_distribute_simd: 7753 case OMPD_teams_distribute_parallel_for: 7754 case OMPD_teams_distribute_parallel_for_simd: 7755 case OMPD_target_update: 7756 case OMPD_declare_simd: 7757 case OMPD_declare_target: 7758 case OMPD_end_declare_target: 7759 case OMPD_declare_reduction: 7760 case OMPD_taskloop: 7761 case OMPD_taskloop_simd: 7762 case OMPD_unknown: 7763 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 7764 } 7765 return; 7766 } 7767 7768 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 7769 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 7770 return; 7771 7772 scanForTargetRegionsFunctions( 7773 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 7774 return; 7775 } 7776 7777 // If this is a lambda function, look into its body. 7778 if (const auto *L = dyn_cast<LambdaExpr>(S)) 7779 S = L->getBody(); 7780 7781 // Keep looking for target regions recursively. 7782 for (const Stmt *II : S->children()) 7783 scanForTargetRegionsFunctions(II, ParentName); 7784 } 7785 7786 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 7787 const auto *FD = cast<FunctionDecl>(GD.getDecl()); 7788 7789 // If emitting code for the host, we do not process FD here. Instead we do 7790 // the normal code generation. 7791 if (!CGM.getLangOpts().OpenMPIsDevice) 7792 return false; 7793 7794 // Try to detect target regions in the function. 7795 scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD)); 7796 7797 // Do not to emit function if it is not marked as declare target. 7798 return !isDeclareTargetDeclaration(FD); 7799 } 7800 7801 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 7802 if (!CGM.getLangOpts().OpenMPIsDevice) 7803 return false; 7804 7805 // Check if there are Ctors/Dtors in this declaration and look for target 7806 // regions in it. We use the complete variant to produce the kernel name 7807 // mangling. 7808 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 7809 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 7810 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 7811 StringRef ParentName = 7812 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 7813 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 7814 } 7815 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 7816 StringRef ParentName = 7817 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 7818 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 7819 } 7820 } 7821 7822 // Do not to emit variable if it is not marked as declare target. 7823 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7824 isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl())); 7825 return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link; 7826 } 7827 7828 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 7829 llvm::Constant *Addr) { 7830 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7831 isDeclareTargetDeclaration(VD)) { 7832 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 7833 StringRef VarName; 7834 CharUnits VarSize; 7835 llvm::GlobalValue::LinkageTypes Linkage; 7836 switch (*Res) { 7837 case OMPDeclareTargetDeclAttr::MT_To: 7838 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 7839 VarName = CGM.getMangledName(VD); 7840 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 7841 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 7842 break; 7843 case OMPDeclareTargetDeclAttr::MT_Link: 7844 // Map type 'to' because we do not map the original variable but the 7845 // reference. 7846 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 7847 if (!CGM.getLangOpts().OpenMPIsDevice) { 7848 Addr = 7849 cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); 7850 } 7851 VarName = Addr->getName(); 7852 VarSize = CGM.getPointerSize(); 7853 Linkage = llvm::GlobalValue::WeakAnyLinkage; 7854 break; 7855 } 7856 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 7857 VarName, Addr, VarSize, Flags, Linkage); 7858 } 7859 } 7860 7861 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 7862 if (isa<FunctionDecl>(GD.getDecl())) 7863 return emitTargetFunctions(GD); 7864 7865 return emitTargetGlobalVariable(GD); 7866 } 7867 7868 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 7869 CodeGenModule &CGM) 7870 : CGM(CGM) { 7871 if (CGM.getLangOpts().OpenMPIsDevice) { 7872 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 7873 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 7874 } 7875 } 7876 7877 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 7878 if (CGM.getLangOpts().OpenMPIsDevice) 7879 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 7880 } 7881 7882 bool CGOpenMPRuntime::markAsGlobalTarget(const FunctionDecl *D) { 7883 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 7884 return true; 7885 7886 const FunctionDecl *FD = D->getCanonicalDecl(); 7887 // Do not to emit function if it is marked as declare target as it was already 7888 // emitted. 7889 if (isDeclareTargetDeclaration(D)) { 7890 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) { 7891 if (auto *F = dyn_cast_or_null<llvm::Function>( 7892 CGM.GetGlobalValue(CGM.getMangledName(D)))) 7893 return !F->isDeclaration(); 7894 return false; 7895 } 7896 return true; 7897 } 7898 7899 // Do not mark member functions except for static. 7900 if (const auto *Method = dyn_cast<CXXMethodDecl>(FD)) 7901 if (!Method->isStatic()) 7902 return true; 7903 7904 return !AlreadyEmittedTargetFunctions.insert(FD).second; 7905 } 7906 7907 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 7908 // If we have offloading in the current module, we need to emit the entries 7909 // now and register the offloading descriptor. 7910 createOffloadEntriesAndInfoMetadata(); 7911 7912 // Create and register the offloading binary descriptors. This is the main 7913 // entity that captures all the information about offloading in the current 7914 // compilation unit. 7915 return createOffloadingBinaryDescriptorRegistration(); 7916 } 7917 7918 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 7919 const OMPExecutableDirective &D, 7920 SourceLocation Loc, 7921 llvm::Value *OutlinedFn, 7922 ArrayRef<llvm::Value *> CapturedVars) { 7923 if (!CGF.HaveInsertPoint()) 7924 return; 7925 7926 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 7927 CodeGenFunction::RunCleanupsScope Scope(CGF); 7928 7929 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 7930 llvm::Value *Args[] = { 7931 RTLoc, 7932 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 7933 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 7934 llvm::SmallVector<llvm::Value *, 16> RealArgs; 7935 RealArgs.append(std::begin(Args), std::end(Args)); 7936 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 7937 7938 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 7939 CGF.EmitRuntimeCall(RTLFn, RealArgs); 7940 } 7941 7942 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 7943 const Expr *NumTeams, 7944 const Expr *ThreadLimit, 7945 SourceLocation Loc) { 7946 if (!CGF.HaveInsertPoint()) 7947 return; 7948 7949 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 7950 7951 llvm::Value *NumTeamsVal = 7952 NumTeams 7953 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 7954 CGF.CGM.Int32Ty, /* isSigned = */ true) 7955 : CGF.Builder.getInt32(0); 7956 7957 llvm::Value *ThreadLimitVal = 7958 ThreadLimit 7959 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 7960 CGF.CGM.Int32Ty, /* isSigned = */ true) 7961 : CGF.Builder.getInt32(0); 7962 7963 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 7964 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 7965 ThreadLimitVal}; 7966 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 7967 PushNumTeamsArgs); 7968 } 7969 7970 void CGOpenMPRuntime::emitTargetDataCalls( 7971 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 7972 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 7973 if (!CGF.HaveInsertPoint()) 7974 return; 7975 7976 // Action used to replace the default codegen action and turn privatization 7977 // off. 7978 PrePostActionTy NoPrivAction; 7979 7980 // Generate the code for the opening of the data environment. Capture all the 7981 // arguments of the runtime call by reference because they are used in the 7982 // closing of the region. 7983 auto &&BeginThenGen = [this, &D, Device, &Info, 7984 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 7985 // Fill up the arrays with all the mapped variables. 7986 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7987 MappableExprsHandler::MapValuesArrayTy Pointers; 7988 MappableExprsHandler::MapValuesArrayTy Sizes; 7989 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7990 7991 // Get map clause information. 7992 MappableExprsHandler MCHandler(D, CGF); 7993 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 7994 7995 // Fill up the arrays and create the arguments. 7996 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7997 7998 llvm::Value *BasePointersArrayArg = nullptr; 7999 llvm::Value *PointersArrayArg = nullptr; 8000 llvm::Value *SizesArrayArg = nullptr; 8001 llvm::Value *MapTypesArrayArg = nullptr; 8002 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 8003 SizesArrayArg, MapTypesArrayArg, Info); 8004 8005 // Emit device ID if any. 8006 llvm::Value *DeviceID = nullptr; 8007 if (Device) { 8008 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8009 CGF.Int64Ty, /*isSigned=*/true); 8010 } else { 8011 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8012 } 8013 8014 // Emit the number of elements in the offloading arrays. 8015 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 8016 8017 llvm::Value *OffloadingArgs[] = { 8018 DeviceID, PointerNum, BasePointersArrayArg, 8019 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 8020 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 8021 OffloadingArgs); 8022 8023 // If device pointer privatization is required, emit the body of the region 8024 // here. It will have to be duplicated: with and without privatization. 8025 if (!Info.CaptureDeviceAddrMap.empty()) 8026 CodeGen(CGF); 8027 }; 8028 8029 // Generate code for the closing of the data region. 8030 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 8031 PrePostActionTy &) { 8032 assert(Info.isValid() && "Invalid data environment closing arguments."); 8033 8034 llvm::Value *BasePointersArrayArg = nullptr; 8035 llvm::Value *PointersArrayArg = nullptr; 8036 llvm::Value *SizesArrayArg = nullptr; 8037 llvm::Value *MapTypesArrayArg = nullptr; 8038 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 8039 SizesArrayArg, MapTypesArrayArg, Info); 8040 8041 // Emit device ID if any. 8042 llvm::Value *DeviceID = nullptr; 8043 if (Device) { 8044 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8045 CGF.Int64Ty, /*isSigned=*/true); 8046 } else { 8047 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8048 } 8049 8050 // Emit the number of elements in the offloading arrays. 8051 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 8052 8053 llvm::Value *OffloadingArgs[] = { 8054 DeviceID, PointerNum, BasePointersArrayArg, 8055 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 8056 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 8057 OffloadingArgs); 8058 }; 8059 8060 // If we need device pointer privatization, we need to emit the body of the 8061 // region with no privatization in the 'else' branch of the conditional. 8062 // Otherwise, we don't have to do anything. 8063 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 8064 PrePostActionTy &) { 8065 if (!Info.CaptureDeviceAddrMap.empty()) { 8066 CodeGen.setAction(NoPrivAction); 8067 CodeGen(CGF); 8068 } 8069 }; 8070 8071 // We don't have to do anything to close the region if the if clause evaluates 8072 // to false. 8073 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 8074 8075 if (IfCond) { 8076 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 8077 } else { 8078 RegionCodeGenTy RCG(BeginThenGen); 8079 RCG(CGF); 8080 } 8081 8082 // If we don't require privatization of device pointers, we emit the body in 8083 // between the runtime calls. This avoids duplicating the body code. 8084 if (Info.CaptureDeviceAddrMap.empty()) { 8085 CodeGen.setAction(NoPrivAction); 8086 CodeGen(CGF); 8087 } 8088 8089 if (IfCond) { 8090 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 8091 } else { 8092 RegionCodeGenTy RCG(EndThenGen); 8093 RCG(CGF); 8094 } 8095 } 8096 8097 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 8098 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8099 const Expr *Device) { 8100 if (!CGF.HaveInsertPoint()) 8101 return; 8102 8103 assert((isa<OMPTargetEnterDataDirective>(D) || 8104 isa<OMPTargetExitDataDirective>(D) || 8105 isa<OMPTargetUpdateDirective>(D)) && 8106 "Expecting either target enter, exit data, or update directives."); 8107 8108 CodeGenFunction::OMPTargetDataInfo InputInfo; 8109 llvm::Value *MapTypesArray = nullptr; 8110 // Generate the code for the opening of the data environment. 8111 auto &&ThenGen = [this, &D, Device, &InputInfo, 8112 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 8113 // Emit device ID if any. 8114 llvm::Value *DeviceID = nullptr; 8115 if (Device) { 8116 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8117 CGF.Int64Ty, /*isSigned=*/true); 8118 } else { 8119 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8120 } 8121 8122 // Emit the number of elements in the offloading arrays. 8123 llvm::Constant *PointerNum = 8124 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8125 8126 llvm::Value *OffloadingArgs[] = {DeviceID, 8127 PointerNum, 8128 InputInfo.BasePointersArray.getPointer(), 8129 InputInfo.PointersArray.getPointer(), 8130 InputInfo.SizesArray.getPointer(), 8131 MapTypesArray}; 8132 8133 // Select the right runtime function call for each expected standalone 8134 // directive. 8135 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8136 OpenMPRTLFunction RTLFn; 8137 switch (D.getDirectiveKind()) { 8138 case OMPD_target_enter_data: 8139 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 8140 : OMPRTL__tgt_target_data_begin; 8141 break; 8142 case OMPD_target_exit_data: 8143 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 8144 : OMPRTL__tgt_target_data_end; 8145 break; 8146 case OMPD_target_update: 8147 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 8148 : OMPRTL__tgt_target_data_update; 8149 break; 8150 case OMPD_parallel: 8151 case OMPD_for: 8152 case OMPD_parallel_for: 8153 case OMPD_parallel_sections: 8154 case OMPD_for_simd: 8155 case OMPD_parallel_for_simd: 8156 case OMPD_cancel: 8157 case OMPD_cancellation_point: 8158 case OMPD_ordered: 8159 case OMPD_threadprivate: 8160 case OMPD_task: 8161 case OMPD_simd: 8162 case OMPD_sections: 8163 case OMPD_section: 8164 case OMPD_single: 8165 case OMPD_master: 8166 case OMPD_critical: 8167 case OMPD_taskyield: 8168 case OMPD_barrier: 8169 case OMPD_taskwait: 8170 case OMPD_taskgroup: 8171 case OMPD_atomic: 8172 case OMPD_flush: 8173 case OMPD_teams: 8174 case OMPD_target_data: 8175 case OMPD_distribute: 8176 case OMPD_distribute_simd: 8177 case OMPD_distribute_parallel_for: 8178 case OMPD_distribute_parallel_for_simd: 8179 case OMPD_teams_distribute: 8180 case OMPD_teams_distribute_simd: 8181 case OMPD_teams_distribute_parallel_for: 8182 case OMPD_teams_distribute_parallel_for_simd: 8183 case OMPD_declare_simd: 8184 case OMPD_declare_target: 8185 case OMPD_end_declare_target: 8186 case OMPD_declare_reduction: 8187 case OMPD_taskloop: 8188 case OMPD_taskloop_simd: 8189 case OMPD_target: 8190 case OMPD_target_simd: 8191 case OMPD_target_teams_distribute: 8192 case OMPD_target_teams_distribute_simd: 8193 case OMPD_target_teams_distribute_parallel_for: 8194 case OMPD_target_teams_distribute_parallel_for_simd: 8195 case OMPD_target_teams: 8196 case OMPD_target_parallel: 8197 case OMPD_target_parallel_for: 8198 case OMPD_target_parallel_for_simd: 8199 case OMPD_unknown: 8200 llvm_unreachable("Unexpected standalone target data directive."); 8201 break; 8202 } 8203 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 8204 }; 8205 8206 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 8207 CodeGenFunction &CGF, PrePostActionTy &) { 8208 // Fill up the arrays with all the mapped variables. 8209 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8210 MappableExprsHandler::MapValuesArrayTy Pointers; 8211 MappableExprsHandler::MapValuesArrayTy Sizes; 8212 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8213 8214 // Get map clause information. 8215 MappableExprsHandler MEHandler(D, CGF); 8216 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 8217 8218 TargetDataInfo Info; 8219 // Fill up the arrays and create the arguments. 8220 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8221 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8222 Info.PointersArray, Info.SizesArray, 8223 Info.MapTypesArray, Info); 8224 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8225 InputInfo.BasePointersArray = 8226 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8227 InputInfo.PointersArray = 8228 Address(Info.PointersArray, CGM.getPointerAlign()); 8229 InputInfo.SizesArray = 8230 Address(Info.SizesArray, CGM.getPointerAlign()); 8231 MapTypesArray = Info.MapTypesArray; 8232 if (D.hasClausesOfKind<OMPDependClause>()) 8233 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8234 else 8235 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8236 }; 8237 8238 if (IfCond) { 8239 emitOMPIfClause(CGF, IfCond, TargetThenGen, 8240 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 8241 } else { 8242 RegionCodeGenTy ThenRCG(TargetThenGen); 8243 ThenRCG(CGF); 8244 } 8245 } 8246 8247 namespace { 8248 /// Kind of parameter in a function with 'declare simd' directive. 8249 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 8250 /// Attribute set of the parameter. 8251 struct ParamAttrTy { 8252 ParamKindTy Kind = Vector; 8253 llvm::APSInt StrideOrArg; 8254 llvm::APSInt Alignment; 8255 }; 8256 } // namespace 8257 8258 static unsigned evaluateCDTSize(const FunctionDecl *FD, 8259 ArrayRef<ParamAttrTy> ParamAttrs) { 8260 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 8261 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 8262 // of that clause. The VLEN value must be power of 2. 8263 // In other case the notion of the function`s "characteristic data type" (CDT) 8264 // is used to compute the vector length. 8265 // CDT is defined in the following order: 8266 // a) For non-void function, the CDT is the return type. 8267 // b) If the function has any non-uniform, non-linear parameters, then the 8268 // CDT is the type of the first such parameter. 8269 // c) If the CDT determined by a) or b) above is struct, union, or class 8270 // type which is pass-by-value (except for the type that maps to the 8271 // built-in complex data type), the characteristic data type is int. 8272 // d) If none of the above three cases is applicable, the CDT is int. 8273 // The VLEN is then determined based on the CDT and the size of vector 8274 // register of that ISA for which current vector version is generated. The 8275 // VLEN is computed using the formula below: 8276 // VLEN = sizeof(vector_register) / sizeof(CDT), 8277 // where vector register size specified in section 3.2.1 Registers and the 8278 // Stack Frame of original AMD64 ABI document. 8279 QualType RetType = FD->getReturnType(); 8280 if (RetType.isNull()) 8281 return 0; 8282 ASTContext &C = FD->getASTContext(); 8283 QualType CDT; 8284 if (!RetType.isNull() && !RetType->isVoidType()) { 8285 CDT = RetType; 8286 } else { 8287 unsigned Offset = 0; 8288 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 8289 if (ParamAttrs[Offset].Kind == Vector) 8290 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 8291 ++Offset; 8292 } 8293 if (CDT.isNull()) { 8294 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 8295 if (ParamAttrs[I + Offset].Kind == Vector) { 8296 CDT = FD->getParamDecl(I)->getType(); 8297 break; 8298 } 8299 } 8300 } 8301 } 8302 if (CDT.isNull()) 8303 CDT = C.IntTy; 8304 CDT = CDT->getCanonicalTypeUnqualified(); 8305 if (CDT->isRecordType() || CDT->isUnionType()) 8306 CDT = C.IntTy; 8307 return C.getTypeSize(CDT); 8308 } 8309 8310 static void 8311 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 8312 const llvm::APSInt &VLENVal, 8313 ArrayRef<ParamAttrTy> ParamAttrs, 8314 OMPDeclareSimdDeclAttr::BranchStateTy State) { 8315 struct ISADataTy { 8316 char ISA; 8317 unsigned VecRegSize; 8318 }; 8319 ISADataTy ISAData[] = { 8320 { 8321 'b', 128 8322 }, // SSE 8323 { 8324 'c', 256 8325 }, // AVX 8326 { 8327 'd', 256 8328 }, // AVX2 8329 { 8330 'e', 512 8331 }, // AVX512 8332 }; 8333 llvm::SmallVector<char, 2> Masked; 8334 switch (State) { 8335 case OMPDeclareSimdDeclAttr::BS_Undefined: 8336 Masked.push_back('N'); 8337 Masked.push_back('M'); 8338 break; 8339 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 8340 Masked.push_back('N'); 8341 break; 8342 case OMPDeclareSimdDeclAttr::BS_Inbranch: 8343 Masked.push_back('M'); 8344 break; 8345 } 8346 for (char Mask : Masked) { 8347 for (const ISADataTy &Data : ISAData) { 8348 SmallString<256> Buffer; 8349 llvm::raw_svector_ostream Out(Buffer); 8350 Out << "_ZGV" << Data.ISA << Mask; 8351 if (!VLENVal) { 8352 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 8353 evaluateCDTSize(FD, ParamAttrs)); 8354 } else { 8355 Out << VLENVal; 8356 } 8357 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 8358 switch (ParamAttr.Kind){ 8359 case LinearWithVarStride: 8360 Out << 's' << ParamAttr.StrideOrArg; 8361 break; 8362 case Linear: 8363 Out << 'l'; 8364 if (!!ParamAttr.StrideOrArg) 8365 Out << ParamAttr.StrideOrArg; 8366 break; 8367 case Uniform: 8368 Out << 'u'; 8369 break; 8370 case Vector: 8371 Out << 'v'; 8372 break; 8373 } 8374 if (!!ParamAttr.Alignment) 8375 Out << 'a' << ParamAttr.Alignment; 8376 } 8377 Out << '_' << Fn->getName(); 8378 Fn->addFnAttr(Out.str()); 8379 } 8380 } 8381 } 8382 8383 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 8384 llvm::Function *Fn) { 8385 ASTContext &C = CGM.getContext(); 8386 FD = FD->getMostRecentDecl(); 8387 // Map params to their positions in function decl. 8388 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 8389 if (isa<CXXMethodDecl>(FD)) 8390 ParamPositions.try_emplace(FD, 0); 8391 unsigned ParamPos = ParamPositions.size(); 8392 for (const ParmVarDecl *P : FD->parameters()) { 8393 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 8394 ++ParamPos; 8395 } 8396 while (FD) { 8397 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 8398 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 8399 // Mark uniform parameters. 8400 for (const Expr *E : Attr->uniforms()) { 8401 E = E->IgnoreParenImpCasts(); 8402 unsigned Pos; 8403 if (isa<CXXThisExpr>(E)) { 8404 Pos = ParamPositions[FD]; 8405 } else { 8406 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 8407 ->getCanonicalDecl(); 8408 Pos = ParamPositions[PVD]; 8409 } 8410 ParamAttrs[Pos].Kind = Uniform; 8411 } 8412 // Get alignment info. 8413 auto NI = Attr->alignments_begin(); 8414 for (const Expr *E : Attr->aligneds()) { 8415 E = E->IgnoreParenImpCasts(); 8416 unsigned Pos; 8417 QualType ParmTy; 8418 if (isa<CXXThisExpr>(E)) { 8419 Pos = ParamPositions[FD]; 8420 ParmTy = E->getType(); 8421 } else { 8422 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 8423 ->getCanonicalDecl(); 8424 Pos = ParamPositions[PVD]; 8425 ParmTy = PVD->getType(); 8426 } 8427 ParamAttrs[Pos].Alignment = 8428 (*NI) 8429 ? (*NI)->EvaluateKnownConstInt(C) 8430 : llvm::APSInt::getUnsigned( 8431 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 8432 .getQuantity()); 8433 ++NI; 8434 } 8435 // Mark linear parameters. 8436 auto SI = Attr->steps_begin(); 8437 auto MI = Attr->modifiers_begin(); 8438 for (const Expr *E : Attr->linears()) { 8439 E = E->IgnoreParenImpCasts(); 8440 unsigned Pos; 8441 if (isa<CXXThisExpr>(E)) { 8442 Pos = ParamPositions[FD]; 8443 } else { 8444 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 8445 ->getCanonicalDecl(); 8446 Pos = ParamPositions[PVD]; 8447 } 8448 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 8449 ParamAttr.Kind = Linear; 8450 if (*SI) { 8451 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 8452 Expr::SE_AllowSideEffects)) { 8453 if (const auto *DRE = 8454 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 8455 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 8456 ParamAttr.Kind = LinearWithVarStride; 8457 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 8458 ParamPositions[StridePVD->getCanonicalDecl()]); 8459 } 8460 } 8461 } 8462 } 8463 ++SI; 8464 ++MI; 8465 } 8466 llvm::APSInt VLENVal; 8467 if (const Expr *VLEN = Attr->getSimdlen()) 8468 VLENVal = VLEN->EvaluateKnownConstInt(C); 8469 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 8470 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 8471 CGM.getTriple().getArch() == llvm::Triple::x86_64) 8472 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 8473 } 8474 FD = FD->getPreviousDecl(); 8475 } 8476 } 8477 8478 namespace { 8479 /// Cleanup action for doacross support. 8480 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 8481 public: 8482 static const int DoacrossFinArgs = 2; 8483 8484 private: 8485 llvm::Value *RTLFn; 8486 llvm::Value *Args[DoacrossFinArgs]; 8487 8488 public: 8489 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 8490 : RTLFn(RTLFn) { 8491 assert(CallArgs.size() == DoacrossFinArgs); 8492 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 8493 } 8494 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 8495 if (!CGF.HaveInsertPoint()) 8496 return; 8497 CGF.EmitRuntimeCall(RTLFn, Args); 8498 } 8499 }; 8500 } // namespace 8501 8502 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 8503 const OMPLoopDirective &D) { 8504 if (!CGF.HaveInsertPoint()) 8505 return; 8506 8507 ASTContext &C = CGM.getContext(); 8508 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8509 RecordDecl *RD; 8510 if (KmpDimTy.isNull()) { 8511 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 8512 // kmp_int64 lo; // lower 8513 // kmp_int64 up; // upper 8514 // kmp_int64 st; // stride 8515 // }; 8516 RD = C.buildImplicitRecord("kmp_dim"); 8517 RD->startDefinition(); 8518 addFieldToRecordDecl(C, RD, Int64Ty); 8519 addFieldToRecordDecl(C, RD, Int64Ty); 8520 addFieldToRecordDecl(C, RD, Int64Ty); 8521 RD->completeDefinition(); 8522 KmpDimTy = C.getRecordType(RD); 8523 } else { 8524 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 8525 } 8526 8527 Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); 8528 CGF.EmitNullInitialization(DimsAddr, KmpDimTy); 8529 enum { LowerFD = 0, UpperFD, StrideFD }; 8530 // Fill dims with data. 8531 LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); 8532 // dims.upper = num_iterations; 8533 LValue UpperLVal = 8534 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); 8535 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 8536 CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), 8537 Int64Ty, D.getNumIterations()->getExprLoc()); 8538 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 8539 // dims.stride = 1; 8540 LValue StrideLVal = 8541 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); 8542 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 8543 StrideLVal); 8544 8545 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 8546 // kmp_int32 num_dims, struct kmp_dim * dims); 8547 llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), 8548 getThreadID(CGF, D.getLocStart()), 8549 llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), 8550 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8551 DimsAddr.getPointer(), CGM.VoidPtrTy)}; 8552 8553 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 8554 CGF.EmitRuntimeCall(RTLFn, Args); 8555 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 8556 emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; 8557 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 8558 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 8559 llvm::makeArrayRef(FiniArgs)); 8560 } 8561 8562 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 8563 const OMPDependClause *C) { 8564 QualType Int64Ty = 8565 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8566 const Expr *CounterVal = C->getCounterValue(); 8567 assert(CounterVal); 8568 llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), 8569 CounterVal->getType(), Int64Ty, 8570 CounterVal->getExprLoc()); 8571 Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); 8572 CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); 8573 llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), 8574 getThreadID(CGF, C->getLocStart()), 8575 CntAddr.getPointer()}; 8576 llvm::Value *RTLFn; 8577 if (C->getDependencyKind() == OMPC_DEPEND_source) { 8578 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 8579 } else { 8580 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 8581 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 8582 } 8583 CGF.EmitRuntimeCall(RTLFn, Args); 8584 } 8585 8586 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 8587 llvm::Value *Callee, 8588 ArrayRef<llvm::Value *> Args) const { 8589 assert(Loc.isValid() && "Outlined function call location must be valid."); 8590 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 8591 8592 if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { 8593 if (Fn->doesNotThrow()) { 8594 CGF.EmitNounwindRuntimeCall(Fn, Args); 8595 return; 8596 } 8597 } 8598 CGF.EmitRuntimeCall(Callee, Args); 8599 } 8600 8601 void CGOpenMPRuntime::emitOutlinedFunctionCall( 8602 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, 8603 ArrayRef<llvm::Value *> Args) const { 8604 emitCall(CGF, Loc, OutlinedFn, Args); 8605 } 8606 8607 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 8608 const VarDecl *NativeParam, 8609 const VarDecl *TargetParam) const { 8610 return CGF.GetAddrOfLocalVar(NativeParam); 8611 } 8612 8613 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 8614 const VarDecl *VD) { 8615 return Address::invalid(); 8616 } 8617 8618 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 8619 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8620 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 8621 llvm_unreachable("Not supported in SIMD-only mode"); 8622 } 8623 8624 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 8625 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8626 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 8627 llvm_unreachable("Not supported in SIMD-only mode"); 8628 } 8629 8630 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 8631 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8632 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 8633 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 8634 bool Tied, unsigned &NumberOfParts) { 8635 llvm_unreachable("Not supported in SIMD-only mode"); 8636 } 8637 8638 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 8639 SourceLocation Loc, 8640 llvm::Value *OutlinedFn, 8641 ArrayRef<llvm::Value *> CapturedVars, 8642 const Expr *IfCond) { 8643 llvm_unreachable("Not supported in SIMD-only mode"); 8644 } 8645 8646 void CGOpenMPSIMDRuntime::emitCriticalRegion( 8647 CodeGenFunction &CGF, StringRef CriticalName, 8648 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 8649 const Expr *Hint) { 8650 llvm_unreachable("Not supported in SIMD-only mode"); 8651 } 8652 8653 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 8654 const RegionCodeGenTy &MasterOpGen, 8655 SourceLocation Loc) { 8656 llvm_unreachable("Not supported in SIMD-only mode"); 8657 } 8658 8659 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 8660 SourceLocation Loc) { 8661 llvm_unreachable("Not supported in SIMD-only mode"); 8662 } 8663 8664 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 8665 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 8666 SourceLocation Loc) { 8667 llvm_unreachable("Not supported in SIMD-only mode"); 8668 } 8669 8670 void CGOpenMPSIMDRuntime::emitSingleRegion( 8671 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 8672 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 8673 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 8674 ArrayRef<const Expr *> AssignmentOps) { 8675 llvm_unreachable("Not supported in SIMD-only mode"); 8676 } 8677 8678 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 8679 const RegionCodeGenTy &OrderedOpGen, 8680 SourceLocation Loc, 8681 bool IsThreads) { 8682 llvm_unreachable("Not supported in SIMD-only mode"); 8683 } 8684 8685 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 8686 SourceLocation Loc, 8687 OpenMPDirectiveKind Kind, 8688 bool EmitChecks, 8689 bool ForceSimpleCall) { 8690 llvm_unreachable("Not supported in SIMD-only mode"); 8691 } 8692 8693 void CGOpenMPSIMDRuntime::emitForDispatchInit( 8694 CodeGenFunction &CGF, SourceLocation Loc, 8695 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 8696 bool Ordered, const DispatchRTInput &DispatchValues) { 8697 llvm_unreachable("Not supported in SIMD-only mode"); 8698 } 8699 8700 void CGOpenMPSIMDRuntime::emitForStaticInit( 8701 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 8702 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 8703 llvm_unreachable("Not supported in SIMD-only mode"); 8704 } 8705 8706 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 8707 CodeGenFunction &CGF, SourceLocation Loc, 8708 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 8709 llvm_unreachable("Not supported in SIMD-only mode"); 8710 } 8711 8712 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 8713 SourceLocation Loc, 8714 unsigned IVSize, 8715 bool IVSigned) { 8716 llvm_unreachable("Not supported in SIMD-only mode"); 8717 } 8718 8719 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 8720 SourceLocation Loc, 8721 OpenMPDirectiveKind DKind) { 8722 llvm_unreachable("Not supported in SIMD-only mode"); 8723 } 8724 8725 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 8726 SourceLocation Loc, 8727 unsigned IVSize, bool IVSigned, 8728 Address IL, Address LB, 8729 Address UB, Address ST) { 8730 llvm_unreachable("Not supported in SIMD-only mode"); 8731 } 8732 8733 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 8734 llvm::Value *NumThreads, 8735 SourceLocation Loc) { 8736 llvm_unreachable("Not supported in SIMD-only mode"); 8737 } 8738 8739 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 8740 OpenMPProcBindClauseKind ProcBind, 8741 SourceLocation Loc) { 8742 llvm_unreachable("Not supported in SIMD-only mode"); 8743 } 8744 8745 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 8746 const VarDecl *VD, 8747 Address VDAddr, 8748 SourceLocation Loc) { 8749 llvm_unreachable("Not supported in SIMD-only mode"); 8750 } 8751 8752 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 8753 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 8754 CodeGenFunction *CGF) { 8755 llvm_unreachable("Not supported in SIMD-only mode"); 8756 } 8757 8758 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 8759 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 8760 llvm_unreachable("Not supported in SIMD-only mode"); 8761 } 8762 8763 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 8764 ArrayRef<const Expr *> Vars, 8765 SourceLocation Loc) { 8766 llvm_unreachable("Not supported in SIMD-only mode"); 8767 } 8768 8769 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 8770 const OMPExecutableDirective &D, 8771 llvm::Value *TaskFunction, 8772 QualType SharedsTy, Address Shareds, 8773 const Expr *IfCond, 8774 const OMPTaskDataTy &Data) { 8775 llvm_unreachable("Not supported in SIMD-only mode"); 8776 } 8777 8778 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 8779 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 8780 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 8781 const Expr *IfCond, const OMPTaskDataTy &Data) { 8782 llvm_unreachable("Not supported in SIMD-only mode"); 8783 } 8784 8785 void CGOpenMPSIMDRuntime::emitReduction( 8786 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 8787 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 8788 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 8789 assert(Options.SimpleReduction && "Only simple reduction is expected."); 8790 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 8791 ReductionOps, Options); 8792 } 8793 8794 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 8795 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 8796 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 8797 llvm_unreachable("Not supported in SIMD-only mode"); 8798 } 8799 8800 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 8801 SourceLocation Loc, 8802 ReductionCodeGen &RCG, 8803 unsigned N) { 8804 llvm_unreachable("Not supported in SIMD-only mode"); 8805 } 8806 8807 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 8808 SourceLocation Loc, 8809 llvm::Value *ReductionsPtr, 8810 LValue SharedLVal) { 8811 llvm_unreachable("Not supported in SIMD-only mode"); 8812 } 8813 8814 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 8815 SourceLocation Loc) { 8816 llvm_unreachable("Not supported in SIMD-only mode"); 8817 } 8818 8819 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 8820 CodeGenFunction &CGF, SourceLocation Loc, 8821 OpenMPDirectiveKind CancelRegion) { 8822 llvm_unreachable("Not supported in SIMD-only mode"); 8823 } 8824 8825 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 8826 SourceLocation Loc, const Expr *IfCond, 8827 OpenMPDirectiveKind CancelRegion) { 8828 llvm_unreachable("Not supported in SIMD-only mode"); 8829 } 8830 8831 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 8832 const OMPExecutableDirective &D, StringRef ParentName, 8833 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 8834 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 8835 llvm_unreachable("Not supported in SIMD-only mode"); 8836 } 8837 8838 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 8839 const OMPExecutableDirective &D, 8840 llvm::Value *OutlinedFn, 8841 llvm::Value *OutlinedFnID, 8842 const Expr *IfCond, const Expr *Device) { 8843 llvm_unreachable("Not supported in SIMD-only mode"); 8844 } 8845 8846 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 8847 llvm_unreachable("Not supported in SIMD-only mode"); 8848 } 8849 8850 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 8851 llvm_unreachable("Not supported in SIMD-only mode"); 8852 } 8853 8854 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 8855 return false; 8856 } 8857 8858 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 8859 return nullptr; 8860 } 8861 8862 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 8863 const OMPExecutableDirective &D, 8864 SourceLocation Loc, 8865 llvm::Value *OutlinedFn, 8866 ArrayRef<llvm::Value *> CapturedVars) { 8867 llvm_unreachable("Not supported in SIMD-only mode"); 8868 } 8869 8870 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 8871 const Expr *NumTeams, 8872 const Expr *ThreadLimit, 8873 SourceLocation Loc) { 8874 llvm_unreachable("Not supported in SIMD-only mode"); 8875 } 8876 8877 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 8878 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8879 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 8880 llvm_unreachable("Not supported in SIMD-only mode"); 8881 } 8882 8883 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 8884 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8885 const Expr *Device) { 8886 llvm_unreachable("Not supported in SIMD-only mode"); 8887 } 8888 8889 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 8890 const OMPLoopDirective &D) { 8891 llvm_unreachable("Not supported in SIMD-only mode"); 8892 } 8893 8894 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 8895 const OMPDependClause *C) { 8896 llvm_unreachable("Not supported in SIMD-only mode"); 8897 } 8898 8899 const VarDecl * 8900 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 8901 const VarDecl *NativeParam) const { 8902 llvm_unreachable("Not supported in SIMD-only mode"); 8903 } 8904 8905 Address 8906 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 8907 const VarDecl *NativeParam, 8908 const VarDecl *TargetParam) const { 8909 llvm_unreachable("Not supported in SIMD-only mode"); 8910 } 8911 8912