1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CGRecordLayout.h" 18 #include "CodeGenFunction.h" 19 #include "clang/CodeGen/ConstantInitBuilder.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/Basic/BitmaskEnum.h" 23 #include "llvm/ADT/ArrayRef.h" 24 #include "llvm/Bitcode/BitcodeReader.h" 25 #include "llvm/IR/CallSite.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/GlobalValue.h" 28 #include "llvm/IR/Value.h" 29 #include "llvm/Support/Format.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <cassert> 32 33 using namespace clang; 34 using namespace CodeGen; 35 36 namespace { 37 /// Base class for handling code generation inside OpenMP regions. 38 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 39 public: 40 /// Kinds of OpenMP regions used in codegen. 41 enum CGOpenMPRegionKind { 42 /// Region with outlined function for standalone 'parallel' 43 /// directive. 44 ParallelOutlinedRegion, 45 /// Region with outlined function for standalone 'task' directive. 46 TaskOutlinedRegion, 47 /// Region for constructs that do not require function outlining, 48 /// like 'for', 'sections', 'atomic' etc. directives. 49 InlinedRegion, 50 /// Region with outlined function for standalone 'target' directive. 51 TargetRegion, 52 }; 53 54 CGOpenMPRegionInfo(const CapturedStmt &CS, 55 const CGOpenMPRegionKind RegionKind, 56 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 57 bool HasCancel) 58 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 59 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 60 61 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 62 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 63 bool HasCancel) 64 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 65 Kind(Kind), HasCancel(HasCancel) {} 66 67 /// Get a variable or parameter for storing global thread id 68 /// inside OpenMP construct. 69 virtual const VarDecl *getThreadIDVariable() const = 0; 70 71 /// Emit the captured statement body. 72 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 73 74 /// Get an LValue for the current ThreadID variable. 75 /// \return LValue for thread id variable. This LValue always has type int32*. 76 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 77 78 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 79 80 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 81 82 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 83 84 bool hasCancel() const { return HasCancel; } 85 86 static bool classof(const CGCapturedStmtInfo *Info) { 87 return Info->getKind() == CR_OpenMP; 88 } 89 90 ~CGOpenMPRegionInfo() override = default; 91 92 protected: 93 CGOpenMPRegionKind RegionKind; 94 RegionCodeGenTy CodeGen; 95 OpenMPDirectiveKind Kind; 96 bool HasCancel; 97 }; 98 99 /// API for captured statement code generation in OpenMP constructs. 100 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 101 public: 102 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 103 const RegionCodeGenTy &CodeGen, 104 OpenMPDirectiveKind Kind, bool HasCancel, 105 StringRef HelperName) 106 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 107 HasCancel), 108 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 109 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 110 } 111 112 /// Get a variable or parameter for storing global thread id 113 /// inside OpenMP construct. 114 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 115 116 /// Get the name of the capture helper. 117 StringRef getHelperName() const override { return HelperName; } 118 119 static bool classof(const CGCapturedStmtInfo *Info) { 120 return CGOpenMPRegionInfo::classof(Info) && 121 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 122 ParallelOutlinedRegion; 123 } 124 125 private: 126 /// A variable or parameter storing global thread id for OpenMP 127 /// constructs. 128 const VarDecl *ThreadIDVar; 129 StringRef HelperName; 130 }; 131 132 /// API for captured statement code generation in OpenMP constructs. 133 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 134 public: 135 class UntiedTaskActionTy final : public PrePostActionTy { 136 bool Untied; 137 const VarDecl *PartIDVar; 138 const RegionCodeGenTy UntiedCodeGen; 139 llvm::SwitchInst *UntiedSwitch = nullptr; 140 141 public: 142 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 143 const RegionCodeGenTy &UntiedCodeGen) 144 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 145 void Enter(CodeGenFunction &CGF) override { 146 if (Untied) { 147 // Emit task switching point. 148 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 149 CGF.GetAddrOfLocalVar(PartIDVar), 150 PartIDVar->getType()->castAs<PointerType>()); 151 llvm::Value *Res = 152 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 153 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 154 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 155 CGF.EmitBlock(DoneBB); 156 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 157 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 158 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 159 CGF.Builder.GetInsertBlock()); 160 emitUntiedSwitch(CGF); 161 } 162 } 163 void emitUntiedSwitch(CodeGenFunction &CGF) const { 164 if (Untied) { 165 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 166 CGF.GetAddrOfLocalVar(PartIDVar), 167 PartIDVar->getType()->castAs<PointerType>()); 168 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 169 PartIdLVal); 170 UntiedCodeGen(CGF); 171 CodeGenFunction::JumpDest CurPoint = 172 CGF.getJumpDestInCurrentScope(".untied.next."); 173 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 174 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 175 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 176 CGF.Builder.GetInsertBlock()); 177 CGF.EmitBranchThroughCleanup(CurPoint); 178 CGF.EmitBlock(CurPoint.getBlock()); 179 } 180 } 181 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 182 }; 183 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 184 const VarDecl *ThreadIDVar, 185 const RegionCodeGenTy &CodeGen, 186 OpenMPDirectiveKind Kind, bool HasCancel, 187 const UntiedTaskActionTy &Action) 188 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 189 ThreadIDVar(ThreadIDVar), Action(Action) { 190 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 191 } 192 193 /// Get a variable or parameter for storing global thread id 194 /// inside OpenMP construct. 195 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 196 197 /// Get an LValue for the current ThreadID variable. 198 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 199 200 /// Get the name of the capture helper. 201 StringRef getHelperName() const override { return ".omp_outlined."; } 202 203 void emitUntiedSwitch(CodeGenFunction &CGF) override { 204 Action.emitUntiedSwitch(CGF); 205 } 206 207 static bool classof(const CGCapturedStmtInfo *Info) { 208 return CGOpenMPRegionInfo::classof(Info) && 209 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 210 TaskOutlinedRegion; 211 } 212 213 private: 214 /// A variable or parameter storing global thread id for OpenMP 215 /// constructs. 216 const VarDecl *ThreadIDVar; 217 /// Action for emitting code for untied tasks. 218 const UntiedTaskActionTy &Action; 219 }; 220 221 /// API for inlined captured statement code generation in OpenMP 222 /// constructs. 223 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 224 public: 225 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 226 const RegionCodeGenTy &CodeGen, 227 OpenMPDirectiveKind Kind, bool HasCancel) 228 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 229 OldCSI(OldCSI), 230 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 231 232 // Retrieve the value of the context parameter. 233 llvm::Value *getContextValue() const override { 234 if (OuterRegionInfo) 235 return OuterRegionInfo->getContextValue(); 236 llvm_unreachable("No context value for inlined OpenMP region"); 237 } 238 239 void setContextValue(llvm::Value *V) override { 240 if (OuterRegionInfo) { 241 OuterRegionInfo->setContextValue(V); 242 return; 243 } 244 llvm_unreachable("No context value for inlined OpenMP region"); 245 } 246 247 /// Lookup the captured field decl for a variable. 248 const FieldDecl *lookup(const VarDecl *VD) const override { 249 if (OuterRegionInfo) 250 return OuterRegionInfo->lookup(VD); 251 // If there is no outer outlined region,no need to lookup in a list of 252 // captured variables, we can use the original one. 253 return nullptr; 254 } 255 256 FieldDecl *getThisFieldDecl() const override { 257 if (OuterRegionInfo) 258 return OuterRegionInfo->getThisFieldDecl(); 259 return nullptr; 260 } 261 262 /// Get a variable or parameter for storing global thread id 263 /// inside OpenMP construct. 264 const VarDecl *getThreadIDVariable() const override { 265 if (OuterRegionInfo) 266 return OuterRegionInfo->getThreadIDVariable(); 267 return nullptr; 268 } 269 270 /// Get an LValue for the current ThreadID variable. 271 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 272 if (OuterRegionInfo) 273 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 274 llvm_unreachable("No LValue for inlined OpenMP construct"); 275 } 276 277 /// Get the name of the capture helper. 278 StringRef getHelperName() const override { 279 if (auto *OuterRegionInfo = getOldCSI()) 280 return OuterRegionInfo->getHelperName(); 281 llvm_unreachable("No helper name for inlined OpenMP construct"); 282 } 283 284 void emitUntiedSwitch(CodeGenFunction &CGF) override { 285 if (OuterRegionInfo) 286 OuterRegionInfo->emitUntiedSwitch(CGF); 287 } 288 289 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 290 291 static bool classof(const CGCapturedStmtInfo *Info) { 292 return CGOpenMPRegionInfo::classof(Info) && 293 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 294 } 295 296 ~CGOpenMPInlinedRegionInfo() override = default; 297 298 private: 299 /// CodeGen info about outer OpenMP region. 300 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 301 CGOpenMPRegionInfo *OuterRegionInfo; 302 }; 303 304 /// API for captured statement code generation in OpenMP target 305 /// constructs. For this captures, implicit parameters are used instead of the 306 /// captured fields. The name of the target region has to be unique in a given 307 /// application so it is provided by the client, because only the client has 308 /// the information to generate that. 309 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 310 public: 311 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 312 const RegionCodeGenTy &CodeGen, StringRef HelperName) 313 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 314 /*HasCancel=*/false), 315 HelperName(HelperName) {} 316 317 /// This is unused for target regions because each starts executing 318 /// with a single thread. 319 const VarDecl *getThreadIDVariable() const override { return nullptr; } 320 321 /// Get the name of the capture helper. 322 StringRef getHelperName() const override { return HelperName; } 323 324 static bool classof(const CGCapturedStmtInfo *Info) { 325 return CGOpenMPRegionInfo::classof(Info) && 326 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 327 } 328 329 private: 330 StringRef HelperName; 331 }; 332 333 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 334 llvm_unreachable("No codegen for expressions"); 335 } 336 /// API for generation of expressions captured in a innermost OpenMP 337 /// region. 338 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 339 public: 340 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 341 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 342 OMPD_unknown, 343 /*HasCancel=*/false), 344 PrivScope(CGF) { 345 // Make sure the globals captured in the provided statement are local by 346 // using the privatization logic. We assume the same variable is not 347 // captured more than once. 348 for (const auto &C : CS.captures()) { 349 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 350 continue; 351 352 const VarDecl *VD = C.getCapturedVar(); 353 if (VD->isLocalVarDeclOrParm()) 354 continue; 355 356 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 357 /*RefersToEnclosingVariableOrCapture=*/false, 358 VD->getType().getNonReferenceType(), VK_LValue, 359 C.getLocation()); 360 PrivScope.addPrivate( 361 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 362 } 363 (void)PrivScope.Privatize(); 364 } 365 366 /// Lookup the captured field decl for a variable. 367 const FieldDecl *lookup(const VarDecl *VD) const override { 368 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 369 return FD; 370 return nullptr; 371 } 372 373 /// Emit the captured statement body. 374 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 375 llvm_unreachable("No body for expressions"); 376 } 377 378 /// Get a variable or parameter for storing global thread id 379 /// inside OpenMP construct. 380 const VarDecl *getThreadIDVariable() const override { 381 llvm_unreachable("No thread id for expressions"); 382 } 383 384 /// Get the name of the capture helper. 385 StringRef getHelperName() const override { 386 llvm_unreachable("No helper name for expressions"); 387 } 388 389 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 390 391 private: 392 /// Private scope to capture global variables. 393 CodeGenFunction::OMPPrivateScope PrivScope; 394 }; 395 396 /// RAII for emitting code of OpenMP constructs. 397 class InlinedOpenMPRegionRAII { 398 CodeGenFunction &CGF; 399 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 400 FieldDecl *LambdaThisCaptureField = nullptr; 401 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 402 403 public: 404 /// Constructs region for combined constructs. 405 /// \param CodeGen Code generation sequence for combined directives. Includes 406 /// a list of functions used for code generation of implicitly inlined 407 /// regions. 408 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 409 OpenMPDirectiveKind Kind, bool HasCancel) 410 : CGF(CGF) { 411 // Start emission for the construct. 412 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 413 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 414 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 415 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 416 CGF.LambdaThisCaptureField = nullptr; 417 BlockInfo = CGF.BlockInfo; 418 CGF.BlockInfo = nullptr; 419 } 420 421 ~InlinedOpenMPRegionRAII() { 422 // Restore original CapturedStmtInfo only if we're done with code emission. 423 auto *OldCSI = 424 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 425 delete CGF.CapturedStmtInfo; 426 CGF.CapturedStmtInfo = OldCSI; 427 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 428 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 429 CGF.BlockInfo = BlockInfo; 430 } 431 }; 432 433 /// Values for bit flags used in the ident_t to describe the fields. 434 /// All enumeric elements are named and described in accordance with the code 435 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 436 enum OpenMPLocationFlags : unsigned { 437 /// Use trampoline for internal microtask. 438 OMP_IDENT_IMD = 0x01, 439 /// Use c-style ident structure. 440 OMP_IDENT_KMPC = 0x02, 441 /// Atomic reduction option for kmpc_reduce. 442 OMP_ATOMIC_REDUCE = 0x10, 443 /// Explicit 'barrier' directive. 444 OMP_IDENT_BARRIER_EXPL = 0x20, 445 /// Implicit barrier in code. 446 OMP_IDENT_BARRIER_IMPL = 0x40, 447 /// Implicit barrier in 'for' directive. 448 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 449 /// Implicit barrier in 'sections' directive. 450 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 451 /// Implicit barrier in 'single' directive. 452 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 453 /// Call of __kmp_for_static_init for static loop. 454 OMP_IDENT_WORK_LOOP = 0x200, 455 /// Call of __kmp_for_static_init for sections. 456 OMP_IDENT_WORK_SECTIONS = 0x400, 457 /// Call of __kmp_for_static_init for distribute. 458 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 459 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 460 }; 461 462 /// Describes ident structure that describes a source location. 463 /// All descriptions are taken from 464 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 465 /// Original structure: 466 /// typedef struct ident { 467 /// kmp_int32 reserved_1; /**< might be used in Fortran; 468 /// see above */ 469 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 470 /// KMP_IDENT_KMPC identifies this union 471 /// member */ 472 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 473 /// see above */ 474 ///#if USE_ITT_BUILD 475 /// /* but currently used for storing 476 /// region-specific ITT */ 477 /// /* contextual information. */ 478 ///#endif /* USE_ITT_BUILD */ 479 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 480 /// C++ */ 481 /// char const *psource; /**< String describing the source location. 482 /// The string is composed of semi-colon separated 483 // fields which describe the source file, 484 /// the function and a pair of line numbers that 485 /// delimit the construct. 486 /// */ 487 /// } ident_t; 488 enum IdentFieldIndex { 489 /// might be used in Fortran 490 IdentField_Reserved_1, 491 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 492 IdentField_Flags, 493 /// Not really used in Fortran any more 494 IdentField_Reserved_2, 495 /// Source[4] in Fortran, do not use for C++ 496 IdentField_Reserved_3, 497 /// String describing the source location. The string is composed of 498 /// semi-colon separated fields which describe the source file, the function 499 /// and a pair of line numbers that delimit the construct. 500 IdentField_PSource 501 }; 502 503 /// Schedule types for 'omp for' loops (these enumerators are taken from 504 /// the enum sched_type in kmp.h). 505 enum OpenMPSchedType { 506 /// Lower bound for default (unordered) versions. 507 OMP_sch_lower = 32, 508 OMP_sch_static_chunked = 33, 509 OMP_sch_static = 34, 510 OMP_sch_dynamic_chunked = 35, 511 OMP_sch_guided_chunked = 36, 512 OMP_sch_runtime = 37, 513 OMP_sch_auto = 38, 514 /// static with chunk adjustment (e.g., simd) 515 OMP_sch_static_balanced_chunked = 45, 516 /// Lower bound for 'ordered' versions. 517 OMP_ord_lower = 64, 518 OMP_ord_static_chunked = 65, 519 OMP_ord_static = 66, 520 OMP_ord_dynamic_chunked = 67, 521 OMP_ord_guided_chunked = 68, 522 OMP_ord_runtime = 69, 523 OMP_ord_auto = 70, 524 OMP_sch_default = OMP_sch_static, 525 /// dist_schedule types 526 OMP_dist_sch_static_chunked = 91, 527 OMP_dist_sch_static = 92, 528 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 529 /// Set if the monotonic schedule modifier was present. 530 OMP_sch_modifier_monotonic = (1 << 29), 531 /// Set if the nonmonotonic schedule modifier was present. 532 OMP_sch_modifier_nonmonotonic = (1 << 30), 533 }; 534 535 enum OpenMPRTLFunction { 536 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 537 /// kmpc_micro microtask, ...); 538 OMPRTL__kmpc_fork_call, 539 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 540 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 541 OMPRTL__kmpc_threadprivate_cached, 542 /// Call to void __kmpc_threadprivate_register( ident_t *, 543 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 544 OMPRTL__kmpc_threadprivate_register, 545 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 546 OMPRTL__kmpc_global_thread_num, 547 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 548 // kmp_critical_name *crit); 549 OMPRTL__kmpc_critical, 550 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 551 // global_tid, kmp_critical_name *crit, uintptr_t hint); 552 OMPRTL__kmpc_critical_with_hint, 553 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 554 // kmp_critical_name *crit); 555 OMPRTL__kmpc_end_critical, 556 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 557 // global_tid); 558 OMPRTL__kmpc_cancel_barrier, 559 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 560 OMPRTL__kmpc_barrier, 561 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 562 OMPRTL__kmpc_for_static_fini, 563 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 564 // global_tid); 565 OMPRTL__kmpc_serialized_parallel, 566 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 567 // global_tid); 568 OMPRTL__kmpc_end_serialized_parallel, 569 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 570 // kmp_int32 num_threads); 571 OMPRTL__kmpc_push_num_threads, 572 // Call to void __kmpc_flush(ident_t *loc); 573 OMPRTL__kmpc_flush, 574 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 575 OMPRTL__kmpc_master, 576 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 577 OMPRTL__kmpc_end_master, 578 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 579 // int end_part); 580 OMPRTL__kmpc_omp_taskyield, 581 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 582 OMPRTL__kmpc_single, 583 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 584 OMPRTL__kmpc_end_single, 585 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 586 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 587 // kmp_routine_entry_t *task_entry); 588 OMPRTL__kmpc_omp_task_alloc, 589 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 590 // new_task); 591 OMPRTL__kmpc_omp_task, 592 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 593 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 594 // kmp_int32 didit); 595 OMPRTL__kmpc_copyprivate, 596 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 597 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 598 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 599 OMPRTL__kmpc_reduce, 600 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 601 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 602 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 603 // *lck); 604 OMPRTL__kmpc_reduce_nowait, 605 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 606 // kmp_critical_name *lck); 607 OMPRTL__kmpc_end_reduce, 608 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 609 // kmp_critical_name *lck); 610 OMPRTL__kmpc_end_reduce_nowait, 611 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 612 // kmp_task_t * new_task); 613 OMPRTL__kmpc_omp_task_begin_if0, 614 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 615 // kmp_task_t * new_task); 616 OMPRTL__kmpc_omp_task_complete_if0, 617 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 618 OMPRTL__kmpc_ordered, 619 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 620 OMPRTL__kmpc_end_ordered, 621 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 622 // global_tid); 623 OMPRTL__kmpc_omp_taskwait, 624 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 625 OMPRTL__kmpc_taskgroup, 626 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 627 OMPRTL__kmpc_end_taskgroup, 628 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 629 // int proc_bind); 630 OMPRTL__kmpc_push_proc_bind, 631 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 632 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 633 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 634 OMPRTL__kmpc_omp_task_with_deps, 635 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 636 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 637 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 638 OMPRTL__kmpc_omp_wait_deps, 639 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 640 // global_tid, kmp_int32 cncl_kind); 641 OMPRTL__kmpc_cancellationpoint, 642 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 643 // kmp_int32 cncl_kind); 644 OMPRTL__kmpc_cancel, 645 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 646 // kmp_int32 num_teams, kmp_int32 thread_limit); 647 OMPRTL__kmpc_push_num_teams, 648 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 649 // microtask, ...); 650 OMPRTL__kmpc_fork_teams, 651 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 652 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 653 // sched, kmp_uint64 grainsize, void *task_dup); 654 OMPRTL__kmpc_taskloop, 655 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 656 // num_dims, struct kmp_dim *dims); 657 OMPRTL__kmpc_doacross_init, 658 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 659 OMPRTL__kmpc_doacross_fini, 660 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 661 // *vec); 662 OMPRTL__kmpc_doacross_post, 663 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 664 // *vec); 665 OMPRTL__kmpc_doacross_wait, 666 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 667 // *data); 668 OMPRTL__kmpc_task_reduction_init, 669 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 670 // *d); 671 OMPRTL__kmpc_task_reduction_get_th_data, 672 673 // 674 // Offloading related calls 675 // 676 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 677 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 678 // *arg_types); 679 OMPRTL__tgt_target, 680 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 681 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 682 // *arg_types); 683 OMPRTL__tgt_target_nowait, 684 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 685 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 686 // *arg_types, int32_t num_teams, int32_t thread_limit); 687 OMPRTL__tgt_target_teams, 688 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 689 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 690 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 691 OMPRTL__tgt_target_teams_nowait, 692 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 693 OMPRTL__tgt_register_lib, 694 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 695 OMPRTL__tgt_unregister_lib, 696 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 697 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 698 OMPRTL__tgt_target_data_begin, 699 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 700 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 701 // *arg_types); 702 OMPRTL__tgt_target_data_begin_nowait, 703 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 704 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 705 OMPRTL__tgt_target_data_end, 706 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 707 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 708 // *arg_types); 709 OMPRTL__tgt_target_data_end_nowait, 710 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 711 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 712 OMPRTL__tgt_target_data_update, 713 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 714 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 715 // *arg_types); 716 OMPRTL__tgt_target_data_update_nowait, 717 }; 718 719 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 720 /// region. 721 class CleanupTy final : public EHScopeStack::Cleanup { 722 PrePostActionTy *Action; 723 724 public: 725 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 726 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 727 if (!CGF.HaveInsertPoint()) 728 return; 729 Action->Exit(CGF); 730 } 731 }; 732 733 } // anonymous namespace 734 735 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 736 CodeGenFunction::RunCleanupsScope Scope(CGF); 737 if (PrePostAction) { 738 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 739 Callback(CodeGen, CGF, *PrePostAction); 740 } else { 741 PrePostActionTy Action; 742 Callback(CodeGen, CGF, Action); 743 } 744 } 745 746 /// Check if the combiner is a call to UDR combiner and if it is so return the 747 /// UDR decl used for reduction. 748 static const OMPDeclareReductionDecl * 749 getReductionInit(const Expr *ReductionOp) { 750 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 751 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 752 if (const auto *DRE = 753 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 754 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 755 return DRD; 756 return nullptr; 757 } 758 759 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 760 const OMPDeclareReductionDecl *DRD, 761 const Expr *InitOp, 762 Address Private, Address Original, 763 QualType Ty) { 764 if (DRD->getInitializer()) { 765 std::pair<llvm::Function *, llvm::Function *> Reduction = 766 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 767 const auto *CE = cast<CallExpr>(InitOp); 768 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 769 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 770 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 771 const auto *LHSDRE = 772 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 773 const auto *RHSDRE = 774 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 775 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 776 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 777 [=]() { return Private; }); 778 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 779 [=]() { return Original; }); 780 (void)PrivateScope.Privatize(); 781 RValue Func = RValue::get(Reduction.second); 782 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 783 CGF.EmitIgnoredExpr(InitOp); 784 } else { 785 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 786 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 787 auto *GV = new llvm::GlobalVariable( 788 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 789 llvm::GlobalValue::PrivateLinkage, Init, Name); 790 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 791 RValue InitRVal; 792 switch (CGF.getEvaluationKind(Ty)) { 793 case TEK_Scalar: 794 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 795 break; 796 case TEK_Complex: 797 InitRVal = 798 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 799 break; 800 case TEK_Aggregate: 801 InitRVal = RValue::getAggregate(LV.getAddress()); 802 break; 803 } 804 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 805 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 806 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 807 /*IsInitializer=*/false); 808 } 809 } 810 811 /// Emit initialization of arrays of complex types. 812 /// \param DestAddr Address of the array. 813 /// \param Type Type of array. 814 /// \param Init Initial expression of array. 815 /// \param SrcAddr Address of the original array. 816 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 817 QualType Type, bool EmitDeclareReductionInit, 818 const Expr *Init, 819 const OMPDeclareReductionDecl *DRD, 820 Address SrcAddr = Address::invalid()) { 821 // Perform element-by-element initialization. 822 QualType ElementTy; 823 824 // Drill down to the base element type on both arrays. 825 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 826 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 827 DestAddr = 828 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 829 if (DRD) 830 SrcAddr = 831 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 832 833 llvm::Value *SrcBegin = nullptr; 834 if (DRD) 835 SrcBegin = SrcAddr.getPointer(); 836 llvm::Value *DestBegin = DestAddr.getPointer(); 837 // Cast from pointer to array type to pointer to single element. 838 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 839 // The basic structure here is a while-do loop. 840 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 841 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 842 llvm::Value *IsEmpty = 843 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 844 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 845 846 // Enter the loop body, making that address the current address. 847 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 848 CGF.EmitBlock(BodyBB); 849 850 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 851 852 llvm::PHINode *SrcElementPHI = nullptr; 853 Address SrcElementCurrent = Address::invalid(); 854 if (DRD) { 855 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 856 "omp.arraycpy.srcElementPast"); 857 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 858 SrcElementCurrent = 859 Address(SrcElementPHI, 860 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 861 } 862 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 863 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 864 DestElementPHI->addIncoming(DestBegin, EntryBB); 865 Address DestElementCurrent = 866 Address(DestElementPHI, 867 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 868 869 // Emit copy. 870 { 871 CodeGenFunction::RunCleanupsScope InitScope(CGF); 872 if (EmitDeclareReductionInit) { 873 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 874 SrcElementCurrent, ElementTy); 875 } else 876 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 877 /*IsInitializer=*/false); 878 } 879 880 if (DRD) { 881 // Shift the address forward by one element. 882 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 883 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 884 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 885 } 886 887 // Shift the address forward by one element. 888 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 889 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 890 // Check whether we've reached the end. 891 llvm::Value *Done = 892 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 893 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 894 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 895 896 // Done. 897 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 898 } 899 900 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 901 return CGF.EmitOMPSharedLValue(E); 902 } 903 904 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 905 const Expr *E) { 906 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 907 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 908 return LValue(); 909 } 910 911 void ReductionCodeGen::emitAggregateInitialization( 912 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 913 const OMPDeclareReductionDecl *DRD) { 914 // Emit VarDecl with copy init for arrays. 915 // Get the address of the original variable captured in current 916 // captured region. 917 const auto *PrivateVD = 918 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 919 bool EmitDeclareReductionInit = 920 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 921 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 922 EmitDeclareReductionInit, 923 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 924 : PrivateVD->getInit(), 925 DRD, SharedLVal.getAddress()); 926 } 927 928 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 929 ArrayRef<const Expr *> Privates, 930 ArrayRef<const Expr *> ReductionOps) { 931 ClausesData.reserve(Shareds.size()); 932 SharedAddresses.reserve(Shareds.size()); 933 Sizes.reserve(Shareds.size()); 934 BaseDecls.reserve(Shareds.size()); 935 auto IPriv = Privates.begin(); 936 auto IRed = ReductionOps.begin(); 937 for (const Expr *Ref : Shareds) { 938 ClausesData.emplace_back(Ref, *IPriv, *IRed); 939 std::advance(IPriv, 1); 940 std::advance(IRed, 1); 941 } 942 } 943 944 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 945 assert(SharedAddresses.size() == N && 946 "Number of generated lvalues must be exactly N."); 947 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 948 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 949 SharedAddresses.emplace_back(First, Second); 950 } 951 952 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 953 const auto *PrivateVD = 954 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 955 QualType PrivateType = PrivateVD->getType(); 956 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 957 if (!PrivateType->isVariablyModifiedType()) { 958 Sizes.emplace_back( 959 CGF.getTypeSize( 960 SharedAddresses[N].first.getType().getNonReferenceType()), 961 nullptr); 962 return; 963 } 964 llvm::Value *Size; 965 llvm::Value *SizeInChars; 966 auto *ElemType = 967 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 968 ->getElementType(); 969 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 970 if (AsArraySection) { 971 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 972 SharedAddresses[N].first.getPointer()); 973 Size = CGF.Builder.CreateNUWAdd( 974 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 975 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 976 } else { 977 SizeInChars = CGF.getTypeSize( 978 SharedAddresses[N].first.getType().getNonReferenceType()); 979 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 980 } 981 Sizes.emplace_back(SizeInChars, Size); 982 CodeGenFunction::OpaqueValueMapping OpaqueMap( 983 CGF, 984 cast<OpaqueValueExpr>( 985 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 986 RValue::get(Size)); 987 CGF.EmitVariablyModifiedType(PrivateType); 988 } 989 990 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 991 llvm::Value *Size) { 992 const auto *PrivateVD = 993 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 994 QualType PrivateType = PrivateVD->getType(); 995 if (!PrivateType->isVariablyModifiedType()) { 996 assert(!Size && !Sizes[N].second && 997 "Size should be nullptr for non-variably modified reduction " 998 "items."); 999 return; 1000 } 1001 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1002 CGF, 1003 cast<OpaqueValueExpr>( 1004 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1005 RValue::get(Size)); 1006 CGF.EmitVariablyModifiedType(PrivateType); 1007 } 1008 1009 void ReductionCodeGen::emitInitialization( 1010 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1011 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1012 assert(SharedAddresses.size() > N && "No variable was generated"); 1013 const auto *PrivateVD = 1014 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1015 const OMPDeclareReductionDecl *DRD = 1016 getReductionInit(ClausesData[N].ReductionOp); 1017 QualType PrivateType = PrivateVD->getType(); 1018 PrivateAddr = CGF.Builder.CreateElementBitCast( 1019 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1020 QualType SharedType = SharedAddresses[N].first.getType(); 1021 SharedLVal = CGF.MakeAddrLValue( 1022 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1023 CGF.ConvertTypeForMem(SharedType)), 1024 SharedType, SharedAddresses[N].first.getBaseInfo(), 1025 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1026 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1027 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1028 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1029 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1030 PrivateAddr, SharedLVal.getAddress(), 1031 SharedLVal.getType()); 1032 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1033 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1034 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1035 PrivateVD->getType().getQualifiers(), 1036 /*IsInitializer=*/false); 1037 } 1038 } 1039 1040 bool ReductionCodeGen::needCleanups(unsigned N) { 1041 const auto *PrivateVD = 1042 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1043 QualType PrivateType = PrivateVD->getType(); 1044 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1045 return DTorKind != QualType::DK_none; 1046 } 1047 1048 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1049 Address PrivateAddr) { 1050 const auto *PrivateVD = 1051 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1052 QualType PrivateType = PrivateVD->getType(); 1053 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1054 if (needCleanups(N)) { 1055 PrivateAddr = CGF.Builder.CreateElementBitCast( 1056 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1057 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1058 } 1059 } 1060 1061 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1062 LValue BaseLV) { 1063 BaseTy = BaseTy.getNonReferenceType(); 1064 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1065 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1066 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1067 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1068 } else { 1069 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1070 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1071 } 1072 BaseTy = BaseTy->getPointeeType(); 1073 } 1074 return CGF.MakeAddrLValue( 1075 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1076 CGF.ConvertTypeForMem(ElTy)), 1077 BaseLV.getType(), BaseLV.getBaseInfo(), 1078 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1079 } 1080 1081 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1082 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1083 llvm::Value *Addr) { 1084 Address Tmp = Address::invalid(); 1085 Address TopTmp = Address::invalid(); 1086 Address MostTopTmp = Address::invalid(); 1087 BaseTy = BaseTy.getNonReferenceType(); 1088 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1089 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1090 Tmp = CGF.CreateMemTemp(BaseTy); 1091 if (TopTmp.isValid()) 1092 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1093 else 1094 MostTopTmp = Tmp; 1095 TopTmp = Tmp; 1096 BaseTy = BaseTy->getPointeeType(); 1097 } 1098 llvm::Type *Ty = BaseLVType; 1099 if (Tmp.isValid()) 1100 Ty = Tmp.getElementType(); 1101 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1102 if (Tmp.isValid()) { 1103 CGF.Builder.CreateStore(Addr, Tmp); 1104 return MostTopTmp; 1105 } 1106 return Address(Addr, BaseLVAlignment); 1107 } 1108 1109 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1110 const VarDecl *OrigVD = nullptr; 1111 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1112 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1113 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1114 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1115 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1116 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1117 DE = cast<DeclRefExpr>(Base); 1118 OrigVD = cast<VarDecl>(DE->getDecl()); 1119 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1120 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1121 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1122 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1123 DE = cast<DeclRefExpr>(Base); 1124 OrigVD = cast<VarDecl>(DE->getDecl()); 1125 } 1126 return OrigVD; 1127 } 1128 1129 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1130 Address PrivateAddr) { 1131 const DeclRefExpr *DE; 1132 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1133 BaseDecls.emplace_back(OrigVD); 1134 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1135 LValue BaseLValue = 1136 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1137 OriginalBaseLValue); 1138 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1139 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1140 llvm::Value *PrivatePointer = 1141 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1142 PrivateAddr.getPointer(), 1143 SharedAddresses[N].first.getAddress().getType()); 1144 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1145 return castToBase(CGF, OrigVD->getType(), 1146 SharedAddresses[N].first.getType(), 1147 OriginalBaseLValue.getAddress().getType(), 1148 OriginalBaseLValue.getAlignment(), Ptr); 1149 } 1150 BaseDecls.emplace_back( 1151 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1152 return PrivateAddr; 1153 } 1154 1155 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1156 const OMPDeclareReductionDecl *DRD = 1157 getReductionInit(ClausesData[N].ReductionOp); 1158 return DRD && DRD->getInitializer(); 1159 } 1160 1161 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1162 return CGF.EmitLoadOfPointerLValue( 1163 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1164 getThreadIDVariable()->getType()->castAs<PointerType>()); 1165 } 1166 1167 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1168 if (!CGF.HaveInsertPoint()) 1169 return; 1170 // 1.2.2 OpenMP Language Terminology 1171 // Structured block - An executable statement with a single entry at the 1172 // top and a single exit at the bottom. 1173 // The point of exit cannot be a branch out of the structured block. 1174 // longjmp() and throw() must not violate the entry/exit criteria. 1175 CGF.EHStack.pushTerminate(); 1176 CodeGen(CGF); 1177 CGF.EHStack.popTerminate(); 1178 } 1179 1180 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1181 CodeGenFunction &CGF) { 1182 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1183 getThreadIDVariable()->getType(), 1184 AlignmentSource::Decl); 1185 } 1186 1187 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1188 QualType FieldTy) { 1189 auto *Field = FieldDecl::Create( 1190 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1191 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1192 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1193 Field->setAccess(AS_public); 1194 DC->addDecl(Field); 1195 return Field; 1196 } 1197 1198 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1199 StringRef Separator) 1200 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1201 OffloadEntriesInfoManager(CGM) { 1202 ASTContext &C = CGM.getContext(); 1203 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1204 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1205 RD->startDefinition(); 1206 // reserved_1 1207 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1208 // flags 1209 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1210 // reserved_2 1211 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1212 // reserved_3 1213 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1214 // psource 1215 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1216 RD->completeDefinition(); 1217 IdentQTy = C.getRecordType(RD); 1218 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1219 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1220 1221 loadOffloadInfoMetadata(); 1222 } 1223 1224 void CGOpenMPRuntime::clear() { 1225 InternalVars.clear(); 1226 } 1227 1228 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1229 SmallString<128> Buffer; 1230 llvm::raw_svector_ostream OS(Buffer); 1231 StringRef Sep = FirstSeparator; 1232 for (StringRef Part : Parts) { 1233 OS << Sep << Part; 1234 Sep = Separator; 1235 } 1236 return OS.str(); 1237 } 1238 1239 static llvm::Function * 1240 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1241 const Expr *CombinerInitializer, const VarDecl *In, 1242 const VarDecl *Out, bool IsCombiner) { 1243 // void .omp_combiner.(Ty *in, Ty *out); 1244 ASTContext &C = CGM.getContext(); 1245 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1246 FunctionArgList Args; 1247 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1248 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1249 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1250 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1251 Args.push_back(&OmpOutParm); 1252 Args.push_back(&OmpInParm); 1253 const CGFunctionInfo &FnInfo = 1254 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1255 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1256 std::string Name = CGM.getOpenMPRuntime().getName( 1257 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1258 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1259 Name, &CGM.getModule()); 1260 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1261 Fn->removeFnAttr(llvm::Attribute::NoInline); 1262 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1263 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1264 CodeGenFunction CGF(CGM); 1265 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1266 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1267 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1268 Out->getLocation()); 1269 CodeGenFunction::OMPPrivateScope Scope(CGF); 1270 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1271 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1272 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1273 .getAddress(); 1274 }); 1275 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1276 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1277 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1278 .getAddress(); 1279 }); 1280 (void)Scope.Privatize(); 1281 if (!IsCombiner && Out->hasInit() && 1282 !CGF.isTrivialInitializer(Out->getInit())) { 1283 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1284 Out->getType().getQualifiers(), 1285 /*IsInitializer=*/true); 1286 } 1287 if (CombinerInitializer) 1288 CGF.EmitIgnoredExpr(CombinerInitializer); 1289 Scope.ForceCleanup(); 1290 CGF.FinishFunction(); 1291 return Fn; 1292 } 1293 1294 void CGOpenMPRuntime::emitUserDefinedReduction( 1295 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1296 if (UDRMap.count(D) > 0) 1297 return; 1298 ASTContext &C = CGM.getContext(); 1299 if (!In || !Out) { 1300 In = &C.Idents.get("omp_in"); 1301 Out = &C.Idents.get("omp_out"); 1302 } 1303 llvm::Function *Combiner = emitCombinerOrInitializer( 1304 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 1305 cast<VarDecl>(D->lookup(Out).front()), 1306 /*IsCombiner=*/true); 1307 llvm::Function *Initializer = nullptr; 1308 if (const Expr *Init = D->getInitializer()) { 1309 if (!Priv || !Orig) { 1310 Priv = &C.Idents.get("omp_priv"); 1311 Orig = &C.Idents.get("omp_orig"); 1312 } 1313 Initializer = emitCombinerOrInitializer( 1314 CGM, D->getType(), 1315 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1316 : nullptr, 1317 cast<VarDecl>(D->lookup(Orig).front()), 1318 cast<VarDecl>(D->lookup(Priv).front()), 1319 /*IsCombiner=*/false); 1320 } 1321 UDRMap.try_emplace(D, Combiner, Initializer); 1322 if (CGF) { 1323 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1324 Decls.second.push_back(D); 1325 } 1326 } 1327 1328 std::pair<llvm::Function *, llvm::Function *> 1329 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1330 auto I = UDRMap.find(D); 1331 if (I != UDRMap.end()) 1332 return I->second; 1333 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1334 return UDRMap.lookup(D); 1335 } 1336 1337 static llvm::Value *emitParallelOrTeamsOutlinedFunction( 1338 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1339 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1340 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1341 assert(ThreadIDVar->getType()->isPointerType() && 1342 "thread id variable must be of type kmp_int32 *"); 1343 CodeGenFunction CGF(CGM, true); 1344 bool HasCancel = false; 1345 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1346 HasCancel = OPD->hasCancel(); 1347 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1348 HasCancel = OPSD->hasCancel(); 1349 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1350 HasCancel = OPFD->hasCancel(); 1351 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1352 HasCancel = OPFD->hasCancel(); 1353 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1354 HasCancel = OPFD->hasCancel(); 1355 else if (const auto *OPFD = 1356 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1357 HasCancel = OPFD->hasCancel(); 1358 else if (const auto *OPFD = 1359 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1360 HasCancel = OPFD->hasCancel(); 1361 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1362 HasCancel, OutlinedHelperName); 1363 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1364 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1365 } 1366 1367 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 1368 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1369 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1370 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1371 return emitParallelOrTeamsOutlinedFunction( 1372 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1373 } 1374 1375 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1376 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1377 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1378 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1379 return emitParallelOrTeamsOutlinedFunction( 1380 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1381 } 1382 1383 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 1384 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1385 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1386 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1387 bool Tied, unsigned &NumberOfParts) { 1388 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1389 PrePostActionTy &) { 1390 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1391 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1392 llvm::Value *TaskArgs[] = { 1393 UpLoc, ThreadID, 1394 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1395 TaskTVar->getType()->castAs<PointerType>()) 1396 .getPointer()}; 1397 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1398 }; 1399 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1400 UntiedCodeGen); 1401 CodeGen.setAction(Action); 1402 assert(!ThreadIDVar->getType()->isPointerType() && 1403 "thread id variable must be of type kmp_int32 for tasks"); 1404 const OpenMPDirectiveKind Region = 1405 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1406 : OMPD_task; 1407 const CapturedStmt *CS = D.getCapturedStmt(Region); 1408 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1409 CodeGenFunction CGF(CGM, true); 1410 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1411 InnermostKind, 1412 TD ? TD->hasCancel() : false, Action); 1413 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1414 llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS); 1415 if (!Tied) 1416 NumberOfParts = Action.getNumberOfParts(); 1417 return Res; 1418 } 1419 1420 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1421 const RecordDecl *RD, const CGRecordLayout &RL, 1422 ArrayRef<llvm::Constant *> Data) { 1423 llvm::StructType *StructTy = RL.getLLVMType(); 1424 unsigned PrevIdx = 0; 1425 ConstantInitBuilder CIBuilder(CGM); 1426 auto DI = Data.begin(); 1427 for (const FieldDecl *FD : RD->fields()) { 1428 unsigned Idx = RL.getLLVMFieldNo(FD); 1429 // Fill the alignment. 1430 for (unsigned I = PrevIdx; I < Idx; ++I) 1431 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1432 PrevIdx = Idx + 1; 1433 Fields.add(*DI); 1434 ++DI; 1435 } 1436 } 1437 1438 template <class... As> 1439 static llvm::GlobalVariable * 1440 createConstantGlobalStruct(CodeGenModule &CGM, QualType Ty, 1441 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1442 As &&... Args) { 1443 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1444 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1445 ConstantInitBuilder CIBuilder(CGM); 1446 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1447 buildStructValue(Fields, CGM, RD, RL, Data); 1448 return Fields.finishAndCreateGlobal( 1449 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), 1450 /*isConstant=*/true, std::forward<As>(Args)...); 1451 } 1452 1453 template <typename T> 1454 static void 1455 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1456 ArrayRef<llvm::Constant *> Data, 1457 T &Parent) { 1458 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1459 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1460 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1461 buildStructValue(Fields, CGM, RD, RL, Data); 1462 Fields.finishAndAddTo(Parent); 1463 } 1464 1465 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1466 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1467 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 1468 if (!Entry) { 1469 if (!DefaultOpenMPPSource) { 1470 // Initialize default location for psource field of ident_t structure of 1471 // all ident_t objects. Format is ";file;function;line;column;;". 1472 // Taken from 1473 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 1474 DefaultOpenMPPSource = 1475 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1476 DefaultOpenMPPSource = 1477 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1478 } 1479 1480 llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1481 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1482 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1483 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1484 DefaultOpenMPPSource}; 1485 llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct( 1486 CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage); 1487 DefaultOpenMPLocation->setUnnamedAddr( 1488 llvm::GlobalValue::UnnamedAddr::Global); 1489 1490 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 1491 } 1492 return Address(Entry, Align); 1493 } 1494 1495 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1496 SourceLocation Loc, 1497 unsigned Flags) { 1498 Flags |= OMP_IDENT_KMPC; 1499 // If no debug info is generated - return global default location. 1500 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1501 Loc.isInvalid()) 1502 return getOrCreateDefaultLocation(Flags).getPointer(); 1503 1504 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1505 1506 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1507 Address LocValue = Address::invalid(); 1508 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1509 if (I != OpenMPLocThreadIDMap.end()) 1510 LocValue = Address(I->second.DebugLoc, Align); 1511 1512 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1513 // GetOpenMPThreadID was called before this routine. 1514 if (!LocValue.isValid()) { 1515 // Generate "ident_t .kmpc_loc.addr;" 1516 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1517 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1518 Elem.second.DebugLoc = AI.getPointer(); 1519 LocValue = AI; 1520 1521 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1522 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1523 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1524 CGF.getTypeSize(IdentQTy)); 1525 } 1526 1527 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1528 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1529 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1530 LValue PSource = 1531 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1532 1533 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1534 if (OMPDebugLoc == nullptr) { 1535 SmallString<128> Buffer2; 1536 llvm::raw_svector_ostream OS2(Buffer2); 1537 // Build debug location 1538 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1539 OS2 << ";" << PLoc.getFilename() << ";"; 1540 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1541 OS2 << FD->getQualifiedNameAsString(); 1542 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1543 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1544 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1545 } 1546 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1547 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1548 1549 // Our callers always pass this to a runtime function, so for 1550 // convenience, go ahead and return a naked pointer. 1551 return LocValue.getPointer(); 1552 } 1553 1554 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1555 SourceLocation Loc) { 1556 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1557 1558 llvm::Value *ThreadID = nullptr; 1559 // Check whether we've already cached a load of the thread id in this 1560 // function. 1561 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1562 if (I != OpenMPLocThreadIDMap.end()) { 1563 ThreadID = I->second.ThreadID; 1564 if (ThreadID != nullptr) 1565 return ThreadID; 1566 } 1567 // If exceptions are enabled, do not use parameter to avoid possible crash. 1568 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1569 !CGF.getLangOpts().CXXExceptions || 1570 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1571 if (auto *OMPRegionInfo = 1572 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1573 if (OMPRegionInfo->getThreadIDVariable()) { 1574 // Check if this an outlined function with thread id passed as argument. 1575 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1576 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1577 // If value loaded in entry block, cache it and use it everywhere in 1578 // function. 1579 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1580 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1581 Elem.second.ThreadID = ThreadID; 1582 } 1583 return ThreadID; 1584 } 1585 } 1586 } 1587 1588 // This is not an outlined function region - need to call __kmpc_int32 1589 // kmpc_global_thread_num(ident_t *loc). 1590 // Generate thread id value and cache this value for use across the 1591 // function. 1592 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1593 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1594 llvm::CallInst *Call = CGF.Builder.CreateCall( 1595 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1596 emitUpdateLocation(CGF, Loc)); 1597 Call->setCallingConv(CGF.getRuntimeCC()); 1598 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1599 Elem.second.ThreadID = Call; 1600 return Call; 1601 } 1602 1603 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1604 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1605 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1606 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1607 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1608 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1609 UDRMap.erase(D); 1610 FunctionUDRMap.erase(CGF.CurFn); 1611 } 1612 } 1613 1614 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1615 return IdentTy->getPointerTo(); 1616 } 1617 1618 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1619 if (!Kmpc_MicroTy) { 1620 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1621 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1622 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1623 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1624 } 1625 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1626 } 1627 1628 llvm::Constant * 1629 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1630 llvm::Constant *RTLFn = nullptr; 1631 switch (static_cast<OpenMPRTLFunction>(Function)) { 1632 case OMPRTL__kmpc_fork_call: { 1633 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1634 // microtask, ...); 1635 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1636 getKmpc_MicroPointerTy()}; 1637 auto *FnTy = 1638 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1639 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1640 break; 1641 } 1642 case OMPRTL__kmpc_global_thread_num: { 1643 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1644 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1645 auto *FnTy = 1646 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1647 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1648 break; 1649 } 1650 case OMPRTL__kmpc_threadprivate_cached: { 1651 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1652 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1653 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1654 CGM.VoidPtrTy, CGM.SizeTy, 1655 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1656 auto *FnTy = 1657 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1658 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1659 break; 1660 } 1661 case OMPRTL__kmpc_critical: { 1662 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1663 // kmp_critical_name *crit); 1664 llvm::Type *TypeParams[] = { 1665 getIdentTyPointerTy(), CGM.Int32Ty, 1666 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1667 auto *FnTy = 1668 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1669 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1670 break; 1671 } 1672 case OMPRTL__kmpc_critical_with_hint: { 1673 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1674 // kmp_critical_name *crit, uintptr_t hint); 1675 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1676 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1677 CGM.IntPtrTy}; 1678 auto *FnTy = 1679 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1680 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1681 break; 1682 } 1683 case OMPRTL__kmpc_threadprivate_register: { 1684 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1685 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1686 // typedef void *(*kmpc_ctor)(void *); 1687 auto *KmpcCtorTy = 1688 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1689 /*isVarArg*/ false)->getPointerTo(); 1690 // typedef void *(*kmpc_cctor)(void *, void *); 1691 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1692 auto *KmpcCopyCtorTy = 1693 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1694 /*isVarArg*/ false) 1695 ->getPointerTo(); 1696 // typedef void (*kmpc_dtor)(void *); 1697 auto *KmpcDtorTy = 1698 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1699 ->getPointerTo(); 1700 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1701 KmpcCopyCtorTy, KmpcDtorTy}; 1702 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1703 /*isVarArg*/ false); 1704 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1705 break; 1706 } 1707 case OMPRTL__kmpc_end_critical: { 1708 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1709 // kmp_critical_name *crit); 1710 llvm::Type *TypeParams[] = { 1711 getIdentTyPointerTy(), CGM.Int32Ty, 1712 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1713 auto *FnTy = 1714 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1715 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1716 break; 1717 } 1718 case OMPRTL__kmpc_cancel_barrier: { 1719 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1720 // global_tid); 1721 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1722 auto *FnTy = 1723 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1724 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1725 break; 1726 } 1727 case OMPRTL__kmpc_barrier: { 1728 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1729 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1730 auto *FnTy = 1731 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1732 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1733 break; 1734 } 1735 case OMPRTL__kmpc_for_static_fini: { 1736 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1737 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1738 auto *FnTy = 1739 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1740 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1741 break; 1742 } 1743 case OMPRTL__kmpc_push_num_threads: { 1744 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1745 // kmp_int32 num_threads) 1746 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1747 CGM.Int32Ty}; 1748 auto *FnTy = 1749 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1750 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1751 break; 1752 } 1753 case OMPRTL__kmpc_serialized_parallel: { 1754 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1755 // global_tid); 1756 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1757 auto *FnTy = 1758 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1759 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1760 break; 1761 } 1762 case OMPRTL__kmpc_end_serialized_parallel: { 1763 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1764 // global_tid); 1765 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1766 auto *FnTy = 1767 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1768 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1769 break; 1770 } 1771 case OMPRTL__kmpc_flush: { 1772 // Build void __kmpc_flush(ident_t *loc); 1773 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1774 auto *FnTy = 1775 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1776 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1777 break; 1778 } 1779 case OMPRTL__kmpc_master: { 1780 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1781 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1782 auto *FnTy = 1783 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1784 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1785 break; 1786 } 1787 case OMPRTL__kmpc_end_master: { 1788 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1789 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1790 auto *FnTy = 1791 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1792 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1793 break; 1794 } 1795 case OMPRTL__kmpc_omp_taskyield: { 1796 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1797 // int end_part); 1798 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1799 auto *FnTy = 1800 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1801 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1802 break; 1803 } 1804 case OMPRTL__kmpc_single: { 1805 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1806 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1807 auto *FnTy = 1808 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1809 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1810 break; 1811 } 1812 case OMPRTL__kmpc_end_single: { 1813 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1814 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1815 auto *FnTy = 1816 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1817 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1818 break; 1819 } 1820 case OMPRTL__kmpc_omp_task_alloc: { 1821 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1822 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1823 // kmp_routine_entry_t *task_entry); 1824 assert(KmpRoutineEntryPtrTy != nullptr && 1825 "Type kmp_routine_entry_t must be created."); 1826 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1827 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1828 // Return void * and then cast to particular kmp_task_t type. 1829 auto *FnTy = 1830 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1831 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1832 break; 1833 } 1834 case OMPRTL__kmpc_omp_task: { 1835 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1836 // *new_task); 1837 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1838 CGM.VoidPtrTy}; 1839 auto *FnTy = 1840 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1841 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1842 break; 1843 } 1844 case OMPRTL__kmpc_copyprivate: { 1845 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1846 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1847 // kmp_int32 didit); 1848 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1849 auto *CpyFnTy = 1850 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1851 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1852 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1853 CGM.Int32Ty}; 1854 auto *FnTy = 1855 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1856 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1857 break; 1858 } 1859 case OMPRTL__kmpc_reduce: { 1860 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1861 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1862 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1863 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1864 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1865 /*isVarArg=*/false); 1866 llvm::Type *TypeParams[] = { 1867 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1868 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1869 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1870 auto *FnTy = 1871 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1872 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1873 break; 1874 } 1875 case OMPRTL__kmpc_reduce_nowait: { 1876 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1877 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1878 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1879 // *lck); 1880 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1881 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1882 /*isVarArg=*/false); 1883 llvm::Type *TypeParams[] = { 1884 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1885 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1886 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1887 auto *FnTy = 1888 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1889 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1890 break; 1891 } 1892 case OMPRTL__kmpc_end_reduce: { 1893 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1894 // kmp_critical_name *lck); 1895 llvm::Type *TypeParams[] = { 1896 getIdentTyPointerTy(), CGM.Int32Ty, 1897 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1898 auto *FnTy = 1899 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1900 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1901 break; 1902 } 1903 case OMPRTL__kmpc_end_reduce_nowait: { 1904 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1905 // kmp_critical_name *lck); 1906 llvm::Type *TypeParams[] = { 1907 getIdentTyPointerTy(), CGM.Int32Ty, 1908 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1909 auto *FnTy = 1910 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1911 RTLFn = 1912 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_omp_task_begin_if0: { 1916 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1917 // *new_task); 1918 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1919 CGM.VoidPtrTy}; 1920 auto *FnTy = 1921 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1922 RTLFn = 1923 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1924 break; 1925 } 1926 case OMPRTL__kmpc_omp_task_complete_if0: { 1927 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1928 // *new_task); 1929 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1930 CGM.VoidPtrTy}; 1931 auto *FnTy = 1932 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1933 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1934 /*Name=*/"__kmpc_omp_task_complete_if0"); 1935 break; 1936 } 1937 case OMPRTL__kmpc_ordered: { 1938 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1939 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1940 auto *FnTy = 1941 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1942 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1943 break; 1944 } 1945 case OMPRTL__kmpc_end_ordered: { 1946 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1947 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1948 auto *FnTy = 1949 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1950 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1951 break; 1952 } 1953 case OMPRTL__kmpc_omp_taskwait: { 1954 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1955 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1956 auto *FnTy = 1957 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1958 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1959 break; 1960 } 1961 case OMPRTL__kmpc_taskgroup: { 1962 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1963 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1964 auto *FnTy = 1965 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1966 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1967 break; 1968 } 1969 case OMPRTL__kmpc_end_taskgroup: { 1970 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1971 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1972 auto *FnTy = 1973 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1974 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1975 break; 1976 } 1977 case OMPRTL__kmpc_push_proc_bind: { 1978 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1979 // int proc_bind) 1980 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1981 auto *FnTy = 1982 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1983 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1984 break; 1985 } 1986 case OMPRTL__kmpc_omp_task_with_deps: { 1987 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1988 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1989 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1990 llvm::Type *TypeParams[] = { 1991 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1992 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1993 auto *FnTy = 1994 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1995 RTLFn = 1996 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1997 break; 1998 } 1999 case OMPRTL__kmpc_omp_wait_deps: { 2000 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2001 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2002 // kmp_depend_info_t *noalias_dep_list); 2003 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2004 CGM.Int32Ty, CGM.VoidPtrTy, 2005 CGM.Int32Ty, CGM.VoidPtrTy}; 2006 auto *FnTy = 2007 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2008 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2009 break; 2010 } 2011 case OMPRTL__kmpc_cancellationpoint: { 2012 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2013 // global_tid, kmp_int32 cncl_kind) 2014 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2015 auto *FnTy = 2016 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2017 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2018 break; 2019 } 2020 case OMPRTL__kmpc_cancel: { 2021 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2022 // kmp_int32 cncl_kind) 2023 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2024 auto *FnTy = 2025 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2026 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2027 break; 2028 } 2029 case OMPRTL__kmpc_push_num_teams: { 2030 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2031 // kmp_int32 num_teams, kmp_int32 num_threads) 2032 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2033 CGM.Int32Ty}; 2034 auto *FnTy = 2035 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2036 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2037 break; 2038 } 2039 case OMPRTL__kmpc_fork_teams: { 2040 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2041 // microtask, ...); 2042 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2043 getKmpc_MicroPointerTy()}; 2044 auto *FnTy = 2045 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2046 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2047 break; 2048 } 2049 case OMPRTL__kmpc_taskloop: { 2050 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2051 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2052 // sched, kmp_uint64 grainsize, void *task_dup); 2053 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2054 CGM.IntTy, 2055 CGM.VoidPtrTy, 2056 CGM.IntTy, 2057 CGM.Int64Ty->getPointerTo(), 2058 CGM.Int64Ty->getPointerTo(), 2059 CGM.Int64Ty, 2060 CGM.IntTy, 2061 CGM.IntTy, 2062 CGM.Int64Ty, 2063 CGM.VoidPtrTy}; 2064 auto *FnTy = 2065 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2066 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2067 break; 2068 } 2069 case OMPRTL__kmpc_doacross_init: { 2070 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2071 // num_dims, struct kmp_dim *dims); 2072 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2073 CGM.Int32Ty, 2074 CGM.Int32Ty, 2075 CGM.VoidPtrTy}; 2076 auto *FnTy = 2077 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2078 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2079 break; 2080 } 2081 case OMPRTL__kmpc_doacross_fini: { 2082 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2083 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2084 auto *FnTy = 2085 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2086 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2087 break; 2088 } 2089 case OMPRTL__kmpc_doacross_post: { 2090 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2091 // *vec); 2092 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2093 CGM.Int64Ty->getPointerTo()}; 2094 auto *FnTy = 2095 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2096 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2097 break; 2098 } 2099 case OMPRTL__kmpc_doacross_wait: { 2100 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2101 // *vec); 2102 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2103 CGM.Int64Ty->getPointerTo()}; 2104 auto *FnTy = 2105 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2106 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2107 break; 2108 } 2109 case OMPRTL__kmpc_task_reduction_init: { 2110 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2111 // *data); 2112 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2113 auto *FnTy = 2114 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2115 RTLFn = 2116 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2117 break; 2118 } 2119 case OMPRTL__kmpc_task_reduction_get_th_data: { 2120 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2121 // *d); 2122 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2123 auto *FnTy = 2124 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2125 RTLFn = CGM.CreateRuntimeFunction( 2126 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2127 break; 2128 } 2129 case OMPRTL__tgt_target: { 2130 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2131 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2132 // *arg_types); 2133 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2134 CGM.VoidPtrTy, 2135 CGM.Int32Ty, 2136 CGM.VoidPtrPtrTy, 2137 CGM.VoidPtrPtrTy, 2138 CGM.SizeTy->getPointerTo(), 2139 CGM.Int64Ty->getPointerTo()}; 2140 auto *FnTy = 2141 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2142 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2143 break; 2144 } 2145 case OMPRTL__tgt_target_nowait: { 2146 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2147 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2148 // int64_t *arg_types); 2149 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2150 CGM.VoidPtrTy, 2151 CGM.Int32Ty, 2152 CGM.VoidPtrPtrTy, 2153 CGM.VoidPtrPtrTy, 2154 CGM.SizeTy->getPointerTo(), 2155 CGM.Int64Ty->getPointerTo()}; 2156 auto *FnTy = 2157 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2158 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2159 break; 2160 } 2161 case OMPRTL__tgt_target_teams: { 2162 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2163 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2164 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2165 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2166 CGM.VoidPtrTy, 2167 CGM.Int32Ty, 2168 CGM.VoidPtrPtrTy, 2169 CGM.VoidPtrPtrTy, 2170 CGM.SizeTy->getPointerTo(), 2171 CGM.Int64Ty->getPointerTo(), 2172 CGM.Int32Ty, 2173 CGM.Int32Ty}; 2174 auto *FnTy = 2175 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2176 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2177 break; 2178 } 2179 case OMPRTL__tgt_target_teams_nowait: { 2180 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2181 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2182 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2183 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2184 CGM.VoidPtrTy, 2185 CGM.Int32Ty, 2186 CGM.VoidPtrPtrTy, 2187 CGM.VoidPtrPtrTy, 2188 CGM.SizeTy->getPointerTo(), 2189 CGM.Int64Ty->getPointerTo(), 2190 CGM.Int32Ty, 2191 CGM.Int32Ty}; 2192 auto *FnTy = 2193 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2194 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2195 break; 2196 } 2197 case OMPRTL__tgt_register_lib: { 2198 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2199 QualType ParamTy = 2200 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2201 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2202 auto *FnTy = 2203 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2204 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2205 break; 2206 } 2207 case OMPRTL__tgt_unregister_lib: { 2208 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2209 QualType ParamTy = 2210 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2211 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2212 auto *FnTy = 2213 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2214 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2215 break; 2216 } 2217 case OMPRTL__tgt_target_data_begin: { 2218 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2219 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2220 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2221 CGM.Int32Ty, 2222 CGM.VoidPtrPtrTy, 2223 CGM.VoidPtrPtrTy, 2224 CGM.SizeTy->getPointerTo(), 2225 CGM.Int64Ty->getPointerTo()}; 2226 auto *FnTy = 2227 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2228 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2229 break; 2230 } 2231 case OMPRTL__tgt_target_data_begin_nowait: { 2232 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2233 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2234 // *arg_types); 2235 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2236 CGM.Int32Ty, 2237 CGM.VoidPtrPtrTy, 2238 CGM.VoidPtrPtrTy, 2239 CGM.SizeTy->getPointerTo(), 2240 CGM.Int64Ty->getPointerTo()}; 2241 auto *FnTy = 2242 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2243 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2244 break; 2245 } 2246 case OMPRTL__tgt_target_data_end: { 2247 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2248 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2249 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2250 CGM.Int32Ty, 2251 CGM.VoidPtrPtrTy, 2252 CGM.VoidPtrPtrTy, 2253 CGM.SizeTy->getPointerTo(), 2254 CGM.Int64Ty->getPointerTo()}; 2255 auto *FnTy = 2256 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2257 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2258 break; 2259 } 2260 case OMPRTL__tgt_target_data_end_nowait: { 2261 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2262 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2263 // *arg_types); 2264 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2265 CGM.Int32Ty, 2266 CGM.VoidPtrPtrTy, 2267 CGM.VoidPtrPtrTy, 2268 CGM.SizeTy->getPointerTo(), 2269 CGM.Int64Ty->getPointerTo()}; 2270 auto *FnTy = 2271 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2272 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2273 break; 2274 } 2275 case OMPRTL__tgt_target_data_update: { 2276 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2277 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2278 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2279 CGM.Int32Ty, 2280 CGM.VoidPtrPtrTy, 2281 CGM.VoidPtrPtrTy, 2282 CGM.SizeTy->getPointerTo(), 2283 CGM.Int64Ty->getPointerTo()}; 2284 auto *FnTy = 2285 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2286 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2287 break; 2288 } 2289 case OMPRTL__tgt_target_data_update_nowait: { 2290 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2291 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2292 // *arg_types); 2293 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2294 CGM.Int32Ty, 2295 CGM.VoidPtrPtrTy, 2296 CGM.VoidPtrPtrTy, 2297 CGM.SizeTy->getPointerTo(), 2298 CGM.Int64Ty->getPointerTo()}; 2299 auto *FnTy = 2300 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2301 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2302 break; 2303 } 2304 } 2305 assert(RTLFn && "Unable to find OpenMP runtime function"); 2306 return RTLFn; 2307 } 2308 2309 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 2310 bool IVSigned) { 2311 assert((IVSize == 32 || IVSize == 64) && 2312 "IV size is not compatible with the omp runtime"); 2313 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2314 : "__kmpc_for_static_init_4u") 2315 : (IVSigned ? "__kmpc_for_static_init_8" 2316 : "__kmpc_for_static_init_8u"); 2317 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2318 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2319 llvm::Type *TypeParams[] = { 2320 getIdentTyPointerTy(), // loc 2321 CGM.Int32Ty, // tid 2322 CGM.Int32Ty, // schedtype 2323 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2324 PtrTy, // p_lower 2325 PtrTy, // p_upper 2326 PtrTy, // p_stride 2327 ITy, // incr 2328 ITy // chunk 2329 }; 2330 auto *FnTy = 2331 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2332 return CGM.CreateRuntimeFunction(FnTy, Name); 2333 } 2334 2335 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 2336 bool IVSigned) { 2337 assert((IVSize == 32 || IVSize == 64) && 2338 "IV size is not compatible with the omp runtime"); 2339 StringRef Name = 2340 IVSize == 32 2341 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2342 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2343 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2344 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2345 CGM.Int32Ty, // tid 2346 CGM.Int32Ty, // schedtype 2347 ITy, // lower 2348 ITy, // upper 2349 ITy, // stride 2350 ITy // chunk 2351 }; 2352 auto *FnTy = 2353 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2354 return CGM.CreateRuntimeFunction(FnTy, Name); 2355 } 2356 2357 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 2358 bool IVSigned) { 2359 assert((IVSize == 32 || IVSize == 64) && 2360 "IV size is not compatible with the omp runtime"); 2361 StringRef Name = 2362 IVSize == 32 2363 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2364 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2365 llvm::Type *TypeParams[] = { 2366 getIdentTyPointerTy(), // loc 2367 CGM.Int32Ty, // tid 2368 }; 2369 auto *FnTy = 2370 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2371 return CGM.CreateRuntimeFunction(FnTy, Name); 2372 } 2373 2374 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 2375 bool IVSigned) { 2376 assert((IVSize == 32 || IVSize == 64) && 2377 "IV size is not compatible with the omp runtime"); 2378 StringRef Name = 2379 IVSize == 32 2380 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2381 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2382 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2383 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2384 llvm::Type *TypeParams[] = { 2385 getIdentTyPointerTy(), // loc 2386 CGM.Int32Ty, // tid 2387 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2388 PtrTy, // p_lower 2389 PtrTy, // p_upper 2390 PtrTy // p_stride 2391 }; 2392 auto *FnTy = 2393 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2394 return CGM.CreateRuntimeFunction(FnTy, Name); 2395 } 2396 2397 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { 2398 if (CGM.getLangOpts().OpenMPSimd) 2399 return Address::invalid(); 2400 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2401 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2402 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { 2403 SmallString<64> PtrName; 2404 { 2405 llvm::raw_svector_ostream OS(PtrName); 2406 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; 2407 } 2408 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2409 if (!Ptr) { 2410 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2411 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2412 PtrName); 2413 if (!CGM.getLangOpts().OpenMPIsDevice) { 2414 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2415 GV->setLinkage(llvm::GlobalValue::ExternalLinkage); 2416 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2417 } 2418 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); 2419 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2420 } 2421 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2422 } 2423 return Address::invalid(); 2424 } 2425 2426 llvm::Constant * 2427 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2428 assert(!CGM.getLangOpts().OpenMPUseTLS || 2429 !CGM.getContext().getTargetInfo().isTLSSupported()); 2430 // Lookup the entry, lazily creating it if necessary. 2431 std::string Suffix = getName({"cache", ""}); 2432 return getOrCreateInternalVariable( 2433 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2434 } 2435 2436 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2437 const VarDecl *VD, 2438 Address VDAddr, 2439 SourceLocation Loc) { 2440 if (CGM.getLangOpts().OpenMPUseTLS && 2441 CGM.getContext().getTargetInfo().isTLSSupported()) 2442 return VDAddr; 2443 2444 llvm::Type *VarTy = VDAddr.getElementType(); 2445 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2446 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2447 CGM.Int8PtrTy), 2448 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2449 getOrCreateThreadPrivateCache(VD)}; 2450 return Address(CGF.EmitRuntimeCall( 2451 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2452 VDAddr.getAlignment()); 2453 } 2454 2455 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2456 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2457 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2458 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2459 // library. 2460 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2461 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2462 OMPLoc); 2463 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2464 // to register constructor/destructor for variable. 2465 llvm::Value *Args[] = { 2466 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2467 Ctor, CopyCtor, Dtor}; 2468 CGF.EmitRuntimeCall( 2469 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2470 } 2471 2472 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2473 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2474 bool PerformInit, CodeGenFunction *CGF) { 2475 if (CGM.getLangOpts().OpenMPUseTLS && 2476 CGM.getContext().getTargetInfo().isTLSSupported()) 2477 return nullptr; 2478 2479 VD = VD->getDefinition(CGM.getContext()); 2480 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 2481 ThreadPrivateWithDefinition.insert(VD); 2482 QualType ASTTy = VD->getType(); 2483 2484 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2485 const Expr *Init = VD->getAnyInitializer(); 2486 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2487 // Generate function that re-emits the declaration's initializer into the 2488 // threadprivate copy of the variable VD 2489 CodeGenFunction CtorCGF(CGM); 2490 FunctionArgList Args; 2491 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2492 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2493 ImplicitParamDecl::Other); 2494 Args.push_back(&Dst); 2495 2496 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2497 CGM.getContext().VoidPtrTy, Args); 2498 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2499 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2500 llvm::Function *Fn = 2501 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2502 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2503 Args, Loc, Loc); 2504 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2505 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2506 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2507 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2508 Arg = CtorCGF.Builder.CreateElementBitCast( 2509 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2510 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2511 /*IsInitializer=*/true); 2512 ArgVal = CtorCGF.EmitLoadOfScalar( 2513 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2514 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2515 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2516 CtorCGF.FinishFunction(); 2517 Ctor = Fn; 2518 } 2519 if (VD->getType().isDestructedType() != QualType::DK_none) { 2520 // Generate function that emits destructor call for the threadprivate copy 2521 // of the variable VD 2522 CodeGenFunction DtorCGF(CGM); 2523 FunctionArgList Args; 2524 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2525 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2526 ImplicitParamDecl::Other); 2527 Args.push_back(&Dst); 2528 2529 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2530 CGM.getContext().VoidTy, Args); 2531 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2532 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2533 llvm::Function *Fn = 2534 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2535 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2536 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2537 Loc, Loc); 2538 // Create a scope with an artificial location for the body of this function. 2539 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2540 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2541 DtorCGF.GetAddrOfLocalVar(&Dst), 2542 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2543 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2544 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2545 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2546 DtorCGF.FinishFunction(); 2547 Dtor = Fn; 2548 } 2549 // Do not emit init function if it is not required. 2550 if (!Ctor && !Dtor) 2551 return nullptr; 2552 2553 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2554 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2555 /*isVarArg=*/false) 2556 ->getPointerTo(); 2557 // Copying constructor for the threadprivate variable. 2558 // Must be NULL - reserved by runtime, but currently it requires that this 2559 // parameter is always NULL. Otherwise it fires assertion. 2560 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2561 if (Ctor == nullptr) { 2562 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2563 /*isVarArg=*/false) 2564 ->getPointerTo(); 2565 Ctor = llvm::Constant::getNullValue(CtorTy); 2566 } 2567 if (Dtor == nullptr) { 2568 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2569 /*isVarArg=*/false) 2570 ->getPointerTo(); 2571 Dtor = llvm::Constant::getNullValue(DtorTy); 2572 } 2573 if (!CGF) { 2574 auto *InitFunctionTy = 2575 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2576 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2577 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2578 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2579 CodeGenFunction InitCGF(CGM); 2580 FunctionArgList ArgList; 2581 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2582 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2583 Loc, Loc); 2584 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2585 InitCGF.FinishFunction(); 2586 return InitFunction; 2587 } 2588 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2589 } 2590 return nullptr; 2591 } 2592 2593 /// Obtain information that uniquely identifies a target entry. This 2594 /// consists of the file and device IDs as well as line number associated with 2595 /// the relevant entry source location. 2596 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2597 unsigned &DeviceID, unsigned &FileID, 2598 unsigned &LineNum) { 2599 SourceManager &SM = C.getSourceManager(); 2600 2601 // The loc should be always valid and have a file ID (the user cannot use 2602 // #pragma directives in macros) 2603 2604 assert(Loc.isValid() && "Source location is expected to be always valid."); 2605 2606 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2607 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2608 2609 llvm::sys::fs::UniqueID ID; 2610 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2611 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2612 << PLoc.getFilename() << EC.message(); 2613 2614 DeviceID = ID.getDevice(); 2615 FileID = ID.getFile(); 2616 LineNum = PLoc.getLine(); 2617 } 2618 2619 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2620 llvm::GlobalVariable *Addr, 2621 bool PerformInit) { 2622 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2623 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2624 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) 2625 return CGM.getLangOpts().OpenMPIsDevice; 2626 VD = VD->getDefinition(CGM.getContext()); 2627 if (VD && !DeclareTargetWithDefinition.insert(VD).second) 2628 return CGM.getLangOpts().OpenMPIsDevice; 2629 2630 QualType ASTTy = VD->getType(); 2631 2632 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2633 // Produce the unique prefix to identify the new target regions. We use 2634 // the source location of the variable declaration which we know to not 2635 // conflict with any target region. 2636 unsigned DeviceID; 2637 unsigned FileID; 2638 unsigned Line; 2639 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2640 SmallString<128> Buffer, Out; 2641 { 2642 llvm::raw_svector_ostream OS(Buffer); 2643 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2644 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2645 } 2646 2647 const Expr *Init = VD->getAnyInitializer(); 2648 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2649 llvm::Constant *Ctor; 2650 llvm::Constant *ID; 2651 if (CGM.getLangOpts().OpenMPIsDevice) { 2652 // Generate function that re-emits the declaration's initializer into 2653 // the threadprivate copy of the variable VD 2654 CodeGenFunction CtorCGF(CGM); 2655 2656 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2657 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2658 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2659 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2660 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2661 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2662 FunctionArgList(), Loc, Loc); 2663 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2664 CtorCGF.EmitAnyExprToMem(Init, 2665 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2666 Init->getType().getQualifiers(), 2667 /*IsInitializer=*/true); 2668 CtorCGF.FinishFunction(); 2669 Ctor = Fn; 2670 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2671 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2672 } else { 2673 Ctor = new llvm::GlobalVariable( 2674 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2675 llvm::GlobalValue::PrivateLinkage, 2676 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2677 ID = Ctor; 2678 } 2679 2680 // Register the information for the entry associated with the constructor. 2681 Out.clear(); 2682 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2683 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2684 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2685 } 2686 if (VD->getType().isDestructedType() != QualType::DK_none) { 2687 llvm::Constant *Dtor; 2688 llvm::Constant *ID; 2689 if (CGM.getLangOpts().OpenMPIsDevice) { 2690 // Generate function that emits destructor call for the threadprivate 2691 // copy of the variable VD 2692 CodeGenFunction DtorCGF(CGM); 2693 2694 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2695 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2696 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2697 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2698 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2699 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2700 FunctionArgList(), Loc, Loc); 2701 // Create a scope with an artificial location for the body of this 2702 // function. 2703 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2704 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2705 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2706 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2707 DtorCGF.FinishFunction(); 2708 Dtor = Fn; 2709 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2710 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2711 } else { 2712 Dtor = new llvm::GlobalVariable( 2713 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2714 llvm::GlobalValue::PrivateLinkage, 2715 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2716 ID = Dtor; 2717 } 2718 // Register the information for the entry associated with the destructor. 2719 Out.clear(); 2720 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2721 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2722 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2723 } 2724 return CGM.getLangOpts().OpenMPIsDevice; 2725 } 2726 2727 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2728 QualType VarType, 2729 StringRef Name) { 2730 std::string Suffix = getName({"artificial", ""}); 2731 std::string CacheSuffix = getName({"cache", ""}); 2732 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2733 llvm::Value *GAddr = 2734 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2735 llvm::Value *Args[] = { 2736 emitUpdateLocation(CGF, SourceLocation()), 2737 getThreadID(CGF, SourceLocation()), 2738 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2739 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2740 /*IsSigned=*/false), 2741 getOrCreateInternalVariable( 2742 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2743 return Address( 2744 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2745 CGF.EmitRuntimeCall( 2746 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2747 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2748 CGM.getPointerAlign()); 2749 } 2750 2751 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2752 const RegionCodeGenTy &ThenGen, 2753 const RegionCodeGenTy &ElseGen) { 2754 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2755 2756 // If the condition constant folds and can be elided, try to avoid emitting 2757 // the condition and the dead arm of the if/else. 2758 bool CondConstant; 2759 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2760 if (CondConstant) 2761 ThenGen(CGF); 2762 else 2763 ElseGen(CGF); 2764 return; 2765 } 2766 2767 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2768 // emit the conditional branch. 2769 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2770 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2771 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2772 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2773 2774 // Emit the 'then' code. 2775 CGF.EmitBlock(ThenBlock); 2776 ThenGen(CGF); 2777 CGF.EmitBranch(ContBlock); 2778 // Emit the 'else' code if present. 2779 // There is no need to emit line number for unconditional branch. 2780 (void)ApplyDebugLocation::CreateEmpty(CGF); 2781 CGF.EmitBlock(ElseBlock); 2782 ElseGen(CGF); 2783 // There is no need to emit line number for unconditional branch. 2784 (void)ApplyDebugLocation::CreateEmpty(CGF); 2785 CGF.EmitBranch(ContBlock); 2786 // Emit the continuation block for code after the if. 2787 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2788 } 2789 2790 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2791 llvm::Value *OutlinedFn, 2792 ArrayRef<llvm::Value *> CapturedVars, 2793 const Expr *IfCond) { 2794 if (!CGF.HaveInsertPoint()) 2795 return; 2796 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2797 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2798 PrePostActionTy &) { 2799 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2800 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2801 llvm::Value *Args[] = { 2802 RTLoc, 2803 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2804 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2805 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2806 RealArgs.append(std::begin(Args), std::end(Args)); 2807 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2808 2809 llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2810 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2811 }; 2812 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2813 PrePostActionTy &) { 2814 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2815 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2816 // Build calls: 2817 // __kmpc_serialized_parallel(&Loc, GTid); 2818 llvm::Value *Args[] = {RTLoc, ThreadID}; 2819 CGF.EmitRuntimeCall( 2820 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2821 2822 // OutlinedFn(>id, &zero, CapturedStruct); 2823 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2824 /*Name*/ ".zero.addr"); 2825 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2826 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2827 // ThreadId for serialized parallels is 0. 2828 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2829 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2830 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2831 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2832 2833 // __kmpc_end_serialized_parallel(&Loc, GTid); 2834 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2835 CGF.EmitRuntimeCall( 2836 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2837 EndArgs); 2838 }; 2839 if (IfCond) { 2840 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2841 } else { 2842 RegionCodeGenTy ThenRCG(ThenGen); 2843 ThenRCG(CGF); 2844 } 2845 } 2846 2847 // If we're inside an (outlined) parallel region, use the region info's 2848 // thread-ID variable (it is passed in a first argument of the outlined function 2849 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2850 // regular serial code region, get thread ID by calling kmp_int32 2851 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2852 // return the address of that temp. 2853 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2854 SourceLocation Loc) { 2855 if (auto *OMPRegionInfo = 2856 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2857 if (OMPRegionInfo->getThreadIDVariable()) 2858 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2859 2860 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2861 QualType Int32Ty = 2862 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2863 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2864 CGF.EmitStoreOfScalar(ThreadID, 2865 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2866 2867 return ThreadIDTemp; 2868 } 2869 2870 llvm::Constant * 2871 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2872 const llvm::Twine &Name) { 2873 SmallString<256> Buffer; 2874 llvm::raw_svector_ostream Out(Buffer); 2875 Out << Name; 2876 StringRef RuntimeName = Out.str(); 2877 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2878 if (Elem.second) { 2879 assert(Elem.second->getType()->getPointerElementType() == Ty && 2880 "OMP internal variable has different type than requested"); 2881 return &*Elem.second; 2882 } 2883 2884 return Elem.second = new llvm::GlobalVariable( 2885 CGM.getModule(), Ty, /*IsConstant*/ false, 2886 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2887 Elem.first()); 2888 } 2889 2890 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2891 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2892 std::string Name = getName({Prefix, "var"}); 2893 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2894 } 2895 2896 namespace { 2897 /// Common pre(post)-action for different OpenMP constructs. 2898 class CommonActionTy final : public PrePostActionTy { 2899 llvm::Value *EnterCallee; 2900 ArrayRef<llvm::Value *> EnterArgs; 2901 llvm::Value *ExitCallee; 2902 ArrayRef<llvm::Value *> ExitArgs; 2903 bool Conditional; 2904 llvm::BasicBlock *ContBlock = nullptr; 2905 2906 public: 2907 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2908 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2909 bool Conditional = false) 2910 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2911 ExitArgs(ExitArgs), Conditional(Conditional) {} 2912 void Enter(CodeGenFunction &CGF) override { 2913 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2914 if (Conditional) { 2915 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2916 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2917 ContBlock = CGF.createBasicBlock("omp_if.end"); 2918 // Generate the branch (If-stmt) 2919 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2920 CGF.EmitBlock(ThenBlock); 2921 } 2922 } 2923 void Done(CodeGenFunction &CGF) { 2924 // Emit the rest of blocks/branches 2925 CGF.EmitBranch(ContBlock); 2926 CGF.EmitBlock(ContBlock, true); 2927 } 2928 void Exit(CodeGenFunction &CGF) override { 2929 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2930 } 2931 }; 2932 } // anonymous namespace 2933 2934 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2935 StringRef CriticalName, 2936 const RegionCodeGenTy &CriticalOpGen, 2937 SourceLocation Loc, const Expr *Hint) { 2938 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2939 // CriticalOpGen(); 2940 // __kmpc_end_critical(ident_t *, gtid, Lock); 2941 // Prepare arguments and build a call to __kmpc_critical 2942 if (!CGF.HaveInsertPoint()) 2943 return; 2944 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2945 getCriticalRegionLock(CriticalName)}; 2946 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2947 std::end(Args)); 2948 if (Hint) { 2949 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2950 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2951 } 2952 CommonActionTy Action( 2953 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2954 : OMPRTL__kmpc_critical), 2955 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2956 CriticalOpGen.setAction(Action); 2957 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2958 } 2959 2960 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2961 const RegionCodeGenTy &MasterOpGen, 2962 SourceLocation Loc) { 2963 if (!CGF.HaveInsertPoint()) 2964 return; 2965 // if(__kmpc_master(ident_t *, gtid)) { 2966 // MasterOpGen(); 2967 // __kmpc_end_master(ident_t *, gtid); 2968 // } 2969 // Prepare arguments and build a call to __kmpc_master 2970 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2971 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2972 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2973 /*Conditional=*/true); 2974 MasterOpGen.setAction(Action); 2975 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2976 Action.Done(CGF); 2977 } 2978 2979 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2980 SourceLocation Loc) { 2981 if (!CGF.HaveInsertPoint()) 2982 return; 2983 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2984 llvm::Value *Args[] = { 2985 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2986 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2987 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2988 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2989 Region->emitUntiedSwitch(CGF); 2990 } 2991 2992 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2993 const RegionCodeGenTy &TaskgroupOpGen, 2994 SourceLocation Loc) { 2995 if (!CGF.HaveInsertPoint()) 2996 return; 2997 // __kmpc_taskgroup(ident_t *, gtid); 2998 // TaskgroupOpGen(); 2999 // __kmpc_end_taskgroup(ident_t *, gtid); 3000 // Prepare arguments and build a call to __kmpc_taskgroup 3001 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3002 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3003 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3004 Args); 3005 TaskgroupOpGen.setAction(Action); 3006 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3007 } 3008 3009 /// Given an array of pointers to variables, project the address of a 3010 /// given variable. 3011 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3012 unsigned Index, const VarDecl *Var) { 3013 // Pull out the pointer to the variable. 3014 Address PtrAddr = 3015 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 3016 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3017 3018 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3019 Addr = CGF.Builder.CreateElementBitCast( 3020 Addr, CGF.ConvertTypeForMem(Var->getType())); 3021 return Addr; 3022 } 3023 3024 static llvm::Value *emitCopyprivateCopyFunction( 3025 CodeGenModule &CGM, llvm::Type *ArgsType, 3026 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3027 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3028 SourceLocation Loc) { 3029 ASTContext &C = CGM.getContext(); 3030 // void copy_func(void *LHSArg, void *RHSArg); 3031 FunctionArgList Args; 3032 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3033 ImplicitParamDecl::Other); 3034 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3035 ImplicitParamDecl::Other); 3036 Args.push_back(&LHSArg); 3037 Args.push_back(&RHSArg); 3038 const auto &CGFI = 3039 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3040 std::string Name = 3041 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3042 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3043 llvm::GlobalValue::InternalLinkage, Name, 3044 &CGM.getModule()); 3045 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3046 Fn->setDoesNotRecurse(); 3047 CodeGenFunction CGF(CGM); 3048 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3049 // Dest = (void*[n])(LHSArg); 3050 // Src = (void*[n])(RHSArg); 3051 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3052 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3053 ArgsType), CGF.getPointerAlign()); 3054 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3055 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3056 ArgsType), CGF.getPointerAlign()); 3057 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3058 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3059 // ... 3060 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3061 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3062 const auto *DestVar = 3063 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3064 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3065 3066 const auto *SrcVar = 3067 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3068 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3069 3070 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3071 QualType Type = VD->getType(); 3072 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3073 } 3074 CGF.FinishFunction(); 3075 return Fn; 3076 } 3077 3078 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3079 const RegionCodeGenTy &SingleOpGen, 3080 SourceLocation Loc, 3081 ArrayRef<const Expr *> CopyprivateVars, 3082 ArrayRef<const Expr *> SrcExprs, 3083 ArrayRef<const Expr *> DstExprs, 3084 ArrayRef<const Expr *> AssignmentOps) { 3085 if (!CGF.HaveInsertPoint()) 3086 return; 3087 assert(CopyprivateVars.size() == SrcExprs.size() && 3088 CopyprivateVars.size() == DstExprs.size() && 3089 CopyprivateVars.size() == AssignmentOps.size()); 3090 ASTContext &C = CGM.getContext(); 3091 // int32 did_it = 0; 3092 // if(__kmpc_single(ident_t *, gtid)) { 3093 // SingleOpGen(); 3094 // __kmpc_end_single(ident_t *, gtid); 3095 // did_it = 1; 3096 // } 3097 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3098 // <copy_func>, did_it); 3099 3100 Address DidIt = Address::invalid(); 3101 if (!CopyprivateVars.empty()) { 3102 // int32 did_it = 0; 3103 QualType KmpInt32Ty = 3104 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3105 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3106 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3107 } 3108 // Prepare arguments and build a call to __kmpc_single 3109 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3110 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3111 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3112 /*Conditional=*/true); 3113 SingleOpGen.setAction(Action); 3114 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3115 if (DidIt.isValid()) { 3116 // did_it = 1; 3117 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3118 } 3119 Action.Done(CGF); 3120 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3121 // <copy_func>, did_it); 3122 if (DidIt.isValid()) { 3123 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3124 QualType CopyprivateArrayTy = 3125 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3126 /*IndexTypeQuals=*/0); 3127 // Create a list of all private variables for copyprivate. 3128 Address CopyprivateList = 3129 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3130 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3131 Address Elem = CGF.Builder.CreateConstArrayGEP( 3132 CopyprivateList, I, CGF.getPointerSize()); 3133 CGF.Builder.CreateStore( 3134 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3135 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3136 Elem); 3137 } 3138 // Build function that copies private values from single region to all other 3139 // threads in the corresponding parallel region. 3140 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3141 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3142 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3143 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3144 Address CL = 3145 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3146 CGF.VoidPtrTy); 3147 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3148 llvm::Value *Args[] = { 3149 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3150 getThreadID(CGF, Loc), // i32 <gtid> 3151 BufSize, // size_t <buf_size> 3152 CL.getPointer(), // void *<copyprivate list> 3153 CpyFn, // void (*) (void *, void *) <copy_func> 3154 DidItVal // i32 did_it 3155 }; 3156 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3157 } 3158 } 3159 3160 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3161 const RegionCodeGenTy &OrderedOpGen, 3162 SourceLocation Loc, bool IsThreads) { 3163 if (!CGF.HaveInsertPoint()) 3164 return; 3165 // __kmpc_ordered(ident_t *, gtid); 3166 // OrderedOpGen(); 3167 // __kmpc_end_ordered(ident_t *, gtid); 3168 // Prepare arguments and build a call to __kmpc_ordered 3169 if (IsThreads) { 3170 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3171 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3172 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3173 Args); 3174 OrderedOpGen.setAction(Action); 3175 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3176 return; 3177 } 3178 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3179 } 3180 3181 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3182 OpenMPDirectiveKind Kind, bool EmitChecks, 3183 bool ForceSimpleCall) { 3184 if (!CGF.HaveInsertPoint()) 3185 return; 3186 // Build call __kmpc_cancel_barrier(loc, thread_id); 3187 // Build call __kmpc_barrier(loc, thread_id); 3188 unsigned Flags; 3189 if (Kind == OMPD_for) 3190 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3191 else if (Kind == OMPD_sections) 3192 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3193 else if (Kind == OMPD_single) 3194 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3195 else if (Kind == OMPD_barrier) 3196 Flags = OMP_IDENT_BARRIER_EXPL; 3197 else 3198 Flags = OMP_IDENT_BARRIER_IMPL; 3199 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3200 // thread_id); 3201 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3202 getThreadID(CGF, Loc)}; 3203 if (auto *OMPRegionInfo = 3204 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3205 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3206 llvm::Value *Result = CGF.EmitRuntimeCall( 3207 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3208 if (EmitChecks) { 3209 // if (__kmpc_cancel_barrier()) { 3210 // exit from construct; 3211 // } 3212 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3213 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3214 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3215 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3216 CGF.EmitBlock(ExitBB); 3217 // exit from construct; 3218 CodeGenFunction::JumpDest CancelDestination = 3219 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3220 CGF.EmitBranchThroughCleanup(CancelDestination); 3221 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3222 } 3223 return; 3224 } 3225 } 3226 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3227 } 3228 3229 /// Map the OpenMP loop schedule to the runtime enumeration. 3230 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3231 bool Chunked, bool Ordered) { 3232 switch (ScheduleKind) { 3233 case OMPC_SCHEDULE_static: 3234 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3235 : (Ordered ? OMP_ord_static : OMP_sch_static); 3236 case OMPC_SCHEDULE_dynamic: 3237 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3238 case OMPC_SCHEDULE_guided: 3239 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3240 case OMPC_SCHEDULE_runtime: 3241 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3242 case OMPC_SCHEDULE_auto: 3243 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3244 case OMPC_SCHEDULE_unknown: 3245 assert(!Chunked && "chunk was specified but schedule kind not known"); 3246 return Ordered ? OMP_ord_static : OMP_sch_static; 3247 } 3248 llvm_unreachable("Unexpected runtime schedule"); 3249 } 3250 3251 /// Map the OpenMP distribute schedule to the runtime enumeration. 3252 static OpenMPSchedType 3253 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3254 // only static is allowed for dist_schedule 3255 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3256 } 3257 3258 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3259 bool Chunked) const { 3260 OpenMPSchedType Schedule = 3261 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3262 return Schedule == OMP_sch_static; 3263 } 3264 3265 bool CGOpenMPRuntime::isStaticNonchunked( 3266 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3267 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3268 return Schedule == OMP_dist_sch_static; 3269 } 3270 3271 3272 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3273 OpenMPSchedType Schedule = 3274 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3275 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3276 return Schedule != OMP_sch_static; 3277 } 3278 3279 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3280 OpenMPScheduleClauseModifier M1, 3281 OpenMPScheduleClauseModifier M2) { 3282 int Modifier = 0; 3283 switch (M1) { 3284 case OMPC_SCHEDULE_MODIFIER_monotonic: 3285 Modifier = OMP_sch_modifier_monotonic; 3286 break; 3287 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3288 Modifier = OMP_sch_modifier_nonmonotonic; 3289 break; 3290 case OMPC_SCHEDULE_MODIFIER_simd: 3291 if (Schedule == OMP_sch_static_chunked) 3292 Schedule = OMP_sch_static_balanced_chunked; 3293 break; 3294 case OMPC_SCHEDULE_MODIFIER_last: 3295 case OMPC_SCHEDULE_MODIFIER_unknown: 3296 break; 3297 } 3298 switch (M2) { 3299 case OMPC_SCHEDULE_MODIFIER_monotonic: 3300 Modifier = OMP_sch_modifier_monotonic; 3301 break; 3302 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3303 Modifier = OMP_sch_modifier_nonmonotonic; 3304 break; 3305 case OMPC_SCHEDULE_MODIFIER_simd: 3306 if (Schedule == OMP_sch_static_chunked) 3307 Schedule = OMP_sch_static_balanced_chunked; 3308 break; 3309 case OMPC_SCHEDULE_MODIFIER_last: 3310 case OMPC_SCHEDULE_MODIFIER_unknown: 3311 break; 3312 } 3313 return Schedule | Modifier; 3314 } 3315 3316 void CGOpenMPRuntime::emitForDispatchInit( 3317 CodeGenFunction &CGF, SourceLocation Loc, 3318 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3319 bool Ordered, const DispatchRTInput &DispatchValues) { 3320 if (!CGF.HaveInsertPoint()) 3321 return; 3322 OpenMPSchedType Schedule = getRuntimeSchedule( 3323 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3324 assert(Ordered || 3325 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3326 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3327 Schedule != OMP_sch_static_balanced_chunked)); 3328 // Call __kmpc_dispatch_init( 3329 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3330 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3331 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3332 3333 // If the Chunk was not specified in the clause - use default value 1. 3334 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3335 : CGF.Builder.getIntN(IVSize, 1); 3336 llvm::Value *Args[] = { 3337 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3338 CGF.Builder.getInt32(addMonoNonMonoModifier( 3339 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3340 DispatchValues.LB, // Lower 3341 DispatchValues.UB, // Upper 3342 CGF.Builder.getIntN(IVSize, 1), // Stride 3343 Chunk // Chunk 3344 }; 3345 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3346 } 3347 3348 static void emitForStaticInitCall( 3349 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3350 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 3351 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3352 const CGOpenMPRuntime::StaticRTInput &Values) { 3353 if (!CGF.HaveInsertPoint()) 3354 return; 3355 3356 assert(!Values.Ordered); 3357 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3358 Schedule == OMP_sch_static_balanced_chunked || 3359 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3360 Schedule == OMP_dist_sch_static || 3361 Schedule == OMP_dist_sch_static_chunked); 3362 3363 // Call __kmpc_for_static_init( 3364 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3365 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3366 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3367 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3368 llvm::Value *Chunk = Values.Chunk; 3369 if (Chunk == nullptr) { 3370 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3371 Schedule == OMP_dist_sch_static) && 3372 "expected static non-chunked schedule"); 3373 // If the Chunk was not specified in the clause - use default value 1. 3374 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3375 } else { 3376 assert((Schedule == OMP_sch_static_chunked || 3377 Schedule == OMP_sch_static_balanced_chunked || 3378 Schedule == OMP_ord_static_chunked || 3379 Schedule == OMP_dist_sch_static_chunked) && 3380 "expected static chunked schedule"); 3381 } 3382 llvm::Value *Args[] = { 3383 UpdateLocation, 3384 ThreadId, 3385 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3386 M2)), // Schedule type 3387 Values.IL.getPointer(), // &isLastIter 3388 Values.LB.getPointer(), // &LB 3389 Values.UB.getPointer(), // &UB 3390 Values.ST.getPointer(), // &Stride 3391 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3392 Chunk // Chunk 3393 }; 3394 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3395 } 3396 3397 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3398 SourceLocation Loc, 3399 OpenMPDirectiveKind DKind, 3400 const OpenMPScheduleTy &ScheduleKind, 3401 const StaticRTInput &Values) { 3402 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3403 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3404 assert(isOpenMPWorksharingDirective(DKind) && 3405 "Expected loop-based or sections-based directive."); 3406 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3407 isOpenMPLoopDirective(DKind) 3408 ? OMP_IDENT_WORK_LOOP 3409 : OMP_IDENT_WORK_SECTIONS); 3410 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3411 llvm::Constant *StaticInitFunction = 3412 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3413 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3414 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3415 } 3416 3417 void CGOpenMPRuntime::emitDistributeStaticInit( 3418 CodeGenFunction &CGF, SourceLocation Loc, 3419 OpenMPDistScheduleClauseKind SchedKind, 3420 const CGOpenMPRuntime::StaticRTInput &Values) { 3421 OpenMPSchedType ScheduleNum = 3422 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3423 llvm::Value *UpdatedLocation = 3424 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3425 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3426 llvm::Constant *StaticInitFunction = 3427 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3428 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3429 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3430 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3431 } 3432 3433 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3434 SourceLocation Loc, 3435 OpenMPDirectiveKind DKind) { 3436 if (!CGF.HaveInsertPoint()) 3437 return; 3438 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3439 llvm::Value *Args[] = { 3440 emitUpdateLocation(CGF, Loc, 3441 isOpenMPDistributeDirective(DKind) 3442 ? OMP_IDENT_WORK_DISTRIBUTE 3443 : isOpenMPLoopDirective(DKind) 3444 ? OMP_IDENT_WORK_LOOP 3445 : OMP_IDENT_WORK_SECTIONS), 3446 getThreadID(CGF, Loc)}; 3447 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3448 Args); 3449 } 3450 3451 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3452 SourceLocation Loc, 3453 unsigned IVSize, 3454 bool IVSigned) { 3455 if (!CGF.HaveInsertPoint()) 3456 return; 3457 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3458 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3459 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3460 } 3461 3462 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3463 SourceLocation Loc, unsigned IVSize, 3464 bool IVSigned, Address IL, 3465 Address LB, Address UB, 3466 Address ST) { 3467 // Call __kmpc_dispatch_next( 3468 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3469 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3470 // kmp_int[32|64] *p_stride); 3471 llvm::Value *Args[] = { 3472 emitUpdateLocation(CGF, Loc), 3473 getThreadID(CGF, Loc), 3474 IL.getPointer(), // &isLastIter 3475 LB.getPointer(), // &Lower 3476 UB.getPointer(), // &Upper 3477 ST.getPointer() // &Stride 3478 }; 3479 llvm::Value *Call = 3480 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3481 return CGF.EmitScalarConversion( 3482 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3483 CGF.getContext().BoolTy, Loc); 3484 } 3485 3486 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3487 llvm::Value *NumThreads, 3488 SourceLocation Loc) { 3489 if (!CGF.HaveInsertPoint()) 3490 return; 3491 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3492 llvm::Value *Args[] = { 3493 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3494 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3495 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3496 Args); 3497 } 3498 3499 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3500 OpenMPProcBindClauseKind ProcBind, 3501 SourceLocation Loc) { 3502 if (!CGF.HaveInsertPoint()) 3503 return; 3504 // Constants for proc bind value accepted by the runtime. 3505 enum ProcBindTy { 3506 ProcBindFalse = 0, 3507 ProcBindTrue, 3508 ProcBindMaster, 3509 ProcBindClose, 3510 ProcBindSpread, 3511 ProcBindIntel, 3512 ProcBindDefault 3513 } RuntimeProcBind; 3514 switch (ProcBind) { 3515 case OMPC_PROC_BIND_master: 3516 RuntimeProcBind = ProcBindMaster; 3517 break; 3518 case OMPC_PROC_BIND_close: 3519 RuntimeProcBind = ProcBindClose; 3520 break; 3521 case OMPC_PROC_BIND_spread: 3522 RuntimeProcBind = ProcBindSpread; 3523 break; 3524 case OMPC_PROC_BIND_unknown: 3525 llvm_unreachable("Unsupported proc_bind value."); 3526 } 3527 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3528 llvm::Value *Args[] = { 3529 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3530 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3531 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3532 } 3533 3534 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3535 SourceLocation Loc) { 3536 if (!CGF.HaveInsertPoint()) 3537 return; 3538 // Build call void __kmpc_flush(ident_t *loc) 3539 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3540 emitUpdateLocation(CGF, Loc)); 3541 } 3542 3543 namespace { 3544 /// Indexes of fields for type kmp_task_t. 3545 enum KmpTaskTFields { 3546 /// List of shared variables. 3547 KmpTaskTShareds, 3548 /// Task routine. 3549 KmpTaskTRoutine, 3550 /// Partition id for the untied tasks. 3551 KmpTaskTPartId, 3552 /// Function with call of destructors for private variables. 3553 Data1, 3554 /// Task priority. 3555 Data2, 3556 /// (Taskloops only) Lower bound. 3557 KmpTaskTLowerBound, 3558 /// (Taskloops only) Upper bound. 3559 KmpTaskTUpperBound, 3560 /// (Taskloops only) Stride. 3561 KmpTaskTStride, 3562 /// (Taskloops only) Is last iteration flag. 3563 KmpTaskTLastIter, 3564 /// (Taskloops only) Reduction data. 3565 KmpTaskTReductions, 3566 }; 3567 } // anonymous namespace 3568 3569 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3570 return OffloadEntriesTargetRegion.empty() && 3571 OffloadEntriesDeviceGlobalVar.empty(); 3572 } 3573 3574 /// Initialize target region entry. 3575 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3576 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3577 StringRef ParentName, unsigned LineNum, 3578 unsigned Order) { 3579 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3580 "only required for the device " 3581 "code generation."); 3582 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3583 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3584 OMPTargetRegionEntryTargetRegion); 3585 ++OffloadingEntriesNum; 3586 } 3587 3588 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3589 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3590 StringRef ParentName, unsigned LineNum, 3591 llvm::Constant *Addr, llvm::Constant *ID, 3592 OMPTargetRegionEntryKind Flags) { 3593 // If we are emitting code for a target, the entry is already initialized, 3594 // only has to be registered. 3595 if (CGM.getLangOpts().OpenMPIsDevice) { 3596 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3597 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3598 DiagnosticsEngine::Error, 3599 "Unable to find target region on line '%0' in the device code."); 3600 CGM.getDiags().Report(DiagID) << LineNum; 3601 return; 3602 } 3603 auto &Entry = 3604 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3605 assert(Entry.isValid() && "Entry not initialized!"); 3606 Entry.setAddress(Addr); 3607 Entry.setID(ID); 3608 Entry.setFlags(Flags); 3609 } else { 3610 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3611 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3612 ++OffloadingEntriesNum; 3613 } 3614 } 3615 3616 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3617 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3618 unsigned LineNum) const { 3619 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3620 if (PerDevice == OffloadEntriesTargetRegion.end()) 3621 return false; 3622 auto PerFile = PerDevice->second.find(FileID); 3623 if (PerFile == PerDevice->second.end()) 3624 return false; 3625 auto PerParentName = PerFile->second.find(ParentName); 3626 if (PerParentName == PerFile->second.end()) 3627 return false; 3628 auto PerLine = PerParentName->second.find(LineNum); 3629 if (PerLine == PerParentName->second.end()) 3630 return false; 3631 // Fail if this entry is already registered. 3632 if (PerLine->second.getAddress() || PerLine->second.getID()) 3633 return false; 3634 return true; 3635 } 3636 3637 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3638 const OffloadTargetRegionEntryInfoActTy &Action) { 3639 // Scan all target region entries and perform the provided action. 3640 for (const auto &D : OffloadEntriesTargetRegion) 3641 for (const auto &F : D.second) 3642 for (const auto &P : F.second) 3643 for (const auto &L : P.second) 3644 Action(D.first, F.first, P.first(), L.first, L.second); 3645 } 3646 3647 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3648 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3649 OMPTargetGlobalVarEntryKind Flags, 3650 unsigned Order) { 3651 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3652 "only required for the device " 3653 "code generation."); 3654 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3655 ++OffloadingEntriesNum; 3656 } 3657 3658 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3659 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3660 CharUnits VarSize, 3661 OMPTargetGlobalVarEntryKind Flags, 3662 llvm::GlobalValue::LinkageTypes Linkage) { 3663 if (CGM.getLangOpts().OpenMPIsDevice) { 3664 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3665 assert(Entry.isValid() && Entry.getFlags() == Flags && 3666 "Entry not initialized!"); 3667 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3668 "Resetting with the new address."); 3669 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) 3670 return; 3671 Entry.setAddress(Addr); 3672 Entry.setVarSize(VarSize); 3673 Entry.setLinkage(Linkage); 3674 } else { 3675 if (hasDeviceGlobalVarEntryInfo(VarName)) 3676 return; 3677 OffloadEntriesDeviceGlobalVar.try_emplace( 3678 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3679 ++OffloadingEntriesNum; 3680 } 3681 } 3682 3683 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3684 actOnDeviceGlobalVarEntriesInfo( 3685 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3686 // Scan all target region entries and perform the provided action. 3687 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3688 Action(E.getKey(), E.getValue()); 3689 } 3690 3691 llvm::Function * 3692 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3693 // If we don't have entries or if we are emitting code for the device, we 3694 // don't need to do anything. 3695 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3696 return nullptr; 3697 3698 llvm::Module &M = CGM.getModule(); 3699 ASTContext &C = CGM.getContext(); 3700 3701 // Get list of devices we care about 3702 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3703 3704 // We should be creating an offloading descriptor only if there are devices 3705 // specified. 3706 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3707 3708 // Create the external variables that will point to the begin and end of the 3709 // host entries section. These will be defined by the linker. 3710 llvm::Type *OffloadEntryTy = 3711 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3712 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3713 auto *HostEntriesBegin = new llvm::GlobalVariable( 3714 M, OffloadEntryTy, /*isConstant=*/true, 3715 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3716 EntriesBeginName); 3717 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3718 auto *HostEntriesEnd = 3719 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3720 llvm::GlobalValue::ExternalLinkage, 3721 /*Initializer=*/nullptr, EntriesEndName); 3722 3723 // Create all device images 3724 auto *DeviceImageTy = cast<llvm::StructType>( 3725 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3726 ConstantInitBuilder DeviceImagesBuilder(CGM); 3727 ConstantArrayBuilder DeviceImagesEntries = 3728 DeviceImagesBuilder.beginArray(DeviceImageTy); 3729 3730 for (const llvm::Triple &Device : Devices) { 3731 StringRef T = Device.getTriple(); 3732 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3733 auto *ImgBegin = new llvm::GlobalVariable( 3734 M, CGM.Int8Ty, /*isConstant=*/true, 3735 llvm::GlobalValue::ExternalWeakLinkage, 3736 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3737 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3738 auto *ImgEnd = new llvm::GlobalVariable( 3739 M, CGM.Int8Ty, /*isConstant=*/true, 3740 llvm::GlobalValue::ExternalWeakLinkage, 3741 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3742 3743 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3744 HostEntriesEnd}; 3745 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3746 DeviceImagesEntries); 3747 } 3748 3749 // Create device images global array. 3750 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3751 llvm::GlobalVariable *DeviceImages = 3752 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 3753 CGM.getPointerAlign(), 3754 /*isConstant=*/true); 3755 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3756 3757 // This is a Zero array to be used in the creation of the constant expressions 3758 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3759 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3760 3761 // Create the target region descriptor. 3762 llvm::Constant *Data[] = { 3763 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 3764 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3765 DeviceImages, Index), 3766 HostEntriesBegin, HostEntriesEnd}; 3767 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 3768 llvm::GlobalVariable *Desc = createConstantGlobalStruct( 3769 CGM, getTgtBinaryDescriptorQTy(), Data, Descriptor); 3770 3771 // Emit code to register or unregister the descriptor at execution 3772 // startup or closing, respectively. 3773 3774 llvm::Function *UnRegFn; 3775 { 3776 FunctionArgList Args; 3777 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3778 Args.push_back(&DummyPtr); 3779 3780 CodeGenFunction CGF(CGM); 3781 // Disable debug info for global (de-)initializer because they are not part 3782 // of some particular construct. 3783 CGF.disableDebugInfo(); 3784 const auto &FI = 3785 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3786 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3787 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 3788 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 3789 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 3790 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3791 Desc); 3792 CGF.FinishFunction(); 3793 } 3794 llvm::Function *RegFn; 3795 { 3796 CodeGenFunction CGF(CGM); 3797 // Disable debug info for global (de-)initializer because they are not part 3798 // of some particular construct. 3799 CGF.disableDebugInfo(); 3800 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 3801 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3802 3803 // Encode offload target triples into the registration function name. It 3804 // will serve as a comdat key for the registration/unregistration code for 3805 // this particular combination of offloading targets. 3806 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 3807 RegFnNameParts[0] = "omp_offloading"; 3808 RegFnNameParts[1] = "descriptor_reg"; 3809 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 3810 [](const llvm::Triple &T) -> const std::string& { 3811 return T.getTriple(); 3812 }); 3813 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 3814 std::string Descriptor = getName(RegFnNameParts); 3815 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 3816 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 3817 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 3818 // Create a variable to drive the registration and unregistration of the 3819 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3820 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 3821 SourceLocation(), nullptr, C.CharTy, 3822 ImplicitParamDecl::Other); 3823 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3824 CGF.FinishFunction(); 3825 } 3826 if (CGM.supportsCOMDAT()) { 3827 // It is sufficient to call registration function only once, so create a 3828 // COMDAT group for registration/unregistration functions and associated 3829 // data. That would reduce startup time and code size. Registration 3830 // function serves as a COMDAT group key. 3831 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3832 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3833 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3834 RegFn->setComdat(ComdatKey); 3835 UnRegFn->setComdat(ComdatKey); 3836 DeviceImages->setComdat(ComdatKey); 3837 Desc->setComdat(ComdatKey); 3838 } 3839 return RegFn; 3840 } 3841 3842 void CGOpenMPRuntime::createOffloadEntry( 3843 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3844 llvm::GlobalValue::LinkageTypes Linkage) { 3845 StringRef Name = Addr->getName(); 3846 llvm::Module &M = CGM.getModule(); 3847 llvm::LLVMContext &C = M.getContext(); 3848 3849 // Create constant string with the name. 3850 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3851 3852 std::string StringName = getName({"omp_offloading", "entry_name"}); 3853 auto *Str = new llvm::GlobalVariable( 3854 M, StrPtrInit->getType(), /*isConstant=*/true, 3855 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3856 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3857 3858 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3859 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3860 llvm::ConstantInt::get(CGM.SizeTy, Size), 3861 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3862 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3863 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3864 llvm::GlobalVariable *Entry = createConstantGlobalStruct( 3865 CGM, getTgtOffloadEntryQTy(), Data, Twine(EntryName).concat(Name), 3866 llvm::GlobalValue::WeakAnyLinkage); 3867 3868 // The entry has to be created in the section the linker expects it to be. 3869 std::string Section = getName({"omp_offloading", "entries"}); 3870 Entry->setSection(Section); 3871 } 3872 3873 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3874 // Emit the offloading entries and metadata so that the device codegen side 3875 // can easily figure out what to emit. The produced metadata looks like 3876 // this: 3877 // 3878 // !omp_offload.info = !{!1, ...} 3879 // 3880 // Right now we only generate metadata for function that contain target 3881 // regions. 3882 3883 // If we do not have entries, we don't need to do anything. 3884 if (OffloadEntriesInfoManager.empty()) 3885 return; 3886 3887 llvm::Module &M = CGM.getModule(); 3888 llvm::LLVMContext &C = M.getContext(); 3889 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 3890 OrderedEntries(OffloadEntriesInfoManager.size()); 3891 3892 // Auxiliary methods to create metadata values and strings. 3893 auto &&GetMDInt = [this](unsigned V) { 3894 return llvm::ConstantAsMetadata::get( 3895 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3896 }; 3897 3898 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3899 3900 // Create the offloading info metadata node. 3901 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3902 3903 // Create function that emits metadata for each target region entry; 3904 auto &&TargetRegionMetadataEmitter = 3905 [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( 3906 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3907 unsigned Line, 3908 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3909 // Generate metadata for target regions. Each entry of this metadata 3910 // contains: 3911 // - Entry 0 -> Kind of this type of metadata (0). 3912 // - Entry 1 -> Device ID of the file where the entry was identified. 3913 // - Entry 2 -> File ID of the file where the entry was identified. 3914 // - Entry 3 -> Mangled name of the function where the entry was 3915 // identified. 3916 // - Entry 4 -> Line in the file where the entry was identified. 3917 // - Entry 5 -> Order the entry was created. 3918 // The first element of the metadata node is the kind. 3919 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3920 GetMDInt(FileID), GetMDString(ParentName), 3921 GetMDInt(Line), GetMDInt(E.getOrder())}; 3922 3923 // Save this entry in the right position of the ordered entries array. 3924 OrderedEntries[E.getOrder()] = &E; 3925 3926 // Add metadata to the named metadata node. 3927 MD->addOperand(llvm::MDNode::get(C, Ops)); 3928 }; 3929 3930 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3931 TargetRegionMetadataEmitter); 3932 3933 // Create function that emits metadata for each device global variable entry; 3934 auto &&DeviceGlobalVarMetadataEmitter = 3935 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3936 MD](StringRef MangledName, 3937 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3938 &E) { 3939 // Generate metadata for global variables. Each entry of this metadata 3940 // contains: 3941 // - Entry 0 -> Kind of this type of metadata (1). 3942 // - Entry 1 -> Mangled name of the variable. 3943 // - Entry 2 -> Declare target kind. 3944 // - Entry 3 -> Order the entry was created. 3945 // The first element of the metadata node is the kind. 3946 llvm::Metadata *Ops[] = { 3947 GetMDInt(E.getKind()), GetMDString(MangledName), 3948 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3949 3950 // Save this entry in the right position of the ordered entries array. 3951 OrderedEntries[E.getOrder()] = &E; 3952 3953 // Add metadata to the named metadata node. 3954 MD->addOperand(llvm::MDNode::get(C, Ops)); 3955 }; 3956 3957 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3958 DeviceGlobalVarMetadataEmitter); 3959 3960 for (const auto *E : OrderedEntries) { 3961 assert(E && "All ordered entries must exist!"); 3962 if (const auto *CE = 3963 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3964 E)) { 3965 if (!CE->getID() || !CE->getAddress()) { 3966 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3967 DiagnosticsEngine::Error, 3968 "Offloading entry for target region is incorrect: either the " 3969 "address or the ID is invalid."); 3970 CGM.getDiags().Report(DiagID); 3971 continue; 3972 } 3973 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3974 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3975 } else if (const auto *CE = 3976 dyn_cast<OffloadEntriesInfoManagerTy:: 3977 OffloadEntryInfoDeviceGlobalVar>(E)) { 3978 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3979 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3980 CE->getFlags()); 3981 switch (Flags) { 3982 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3983 if (!CE->getAddress()) { 3984 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3985 DiagnosticsEngine::Error, 3986 "Offloading entry for declare target variable is incorrect: the " 3987 "address is invalid."); 3988 CGM.getDiags().Report(DiagID); 3989 continue; 3990 } 3991 break; 3992 } 3993 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3994 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3995 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3996 "Declaret target link address is set."); 3997 if (CGM.getLangOpts().OpenMPIsDevice) 3998 continue; 3999 if (!CE->getAddress()) { 4000 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4001 DiagnosticsEngine::Error, 4002 "Offloading entry for declare target variable is incorrect: the " 4003 "address is invalid."); 4004 CGM.getDiags().Report(DiagID); 4005 continue; 4006 } 4007 break; 4008 } 4009 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4010 CE->getVarSize().getQuantity(), Flags, 4011 CE->getLinkage()); 4012 } else { 4013 llvm_unreachable("Unsupported entry kind."); 4014 } 4015 } 4016 } 4017 4018 /// Loads all the offload entries information from the host IR 4019 /// metadata. 4020 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4021 // If we are in target mode, load the metadata from the host IR. This code has 4022 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4023 4024 if (!CGM.getLangOpts().OpenMPIsDevice) 4025 return; 4026 4027 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4028 return; 4029 4030 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4031 if (auto EC = Buf.getError()) { 4032 CGM.getDiags().Report(diag::err_cannot_open_file) 4033 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4034 return; 4035 } 4036 4037 llvm::LLVMContext C; 4038 auto ME = expectedToErrorOrAndEmitErrors( 4039 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4040 4041 if (auto EC = ME.getError()) { 4042 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4043 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4044 CGM.getDiags().Report(DiagID) 4045 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4046 return; 4047 } 4048 4049 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4050 if (!MD) 4051 return; 4052 4053 for (llvm::MDNode *MN : MD->operands()) { 4054 auto &&GetMDInt = [MN](unsigned Idx) { 4055 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4056 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4057 }; 4058 4059 auto &&GetMDString = [MN](unsigned Idx) { 4060 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4061 return V->getString(); 4062 }; 4063 4064 switch (GetMDInt(0)) { 4065 default: 4066 llvm_unreachable("Unexpected metadata!"); 4067 break; 4068 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4069 OffloadingEntryInfoTargetRegion: 4070 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4071 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4072 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4073 /*Order=*/GetMDInt(5)); 4074 break; 4075 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4076 OffloadingEntryInfoDeviceGlobalVar: 4077 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4078 /*MangledName=*/GetMDString(1), 4079 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4080 /*Flags=*/GetMDInt(2)), 4081 /*Order=*/GetMDInt(3)); 4082 break; 4083 } 4084 } 4085 } 4086 4087 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4088 if (!KmpRoutineEntryPtrTy) { 4089 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4090 ASTContext &C = CGM.getContext(); 4091 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4092 FunctionProtoType::ExtProtoInfo EPI; 4093 KmpRoutineEntryPtrQTy = C.getPointerType( 4094 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4095 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4096 } 4097 } 4098 4099 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4100 // Make sure the type of the entry is already created. This is the type we 4101 // have to create: 4102 // struct __tgt_offload_entry{ 4103 // void *addr; // Pointer to the offload entry info. 4104 // // (function or global) 4105 // char *name; // Name of the function or global. 4106 // size_t size; // Size of the entry info (0 if it a function). 4107 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4108 // int32_t reserved; // Reserved, to use by the runtime library. 4109 // }; 4110 if (TgtOffloadEntryQTy.isNull()) { 4111 ASTContext &C = CGM.getContext(); 4112 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4113 RD->startDefinition(); 4114 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4115 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4116 addFieldToRecordDecl(C, RD, C.getSizeType()); 4117 addFieldToRecordDecl( 4118 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4119 addFieldToRecordDecl( 4120 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4121 RD->completeDefinition(); 4122 RD->addAttr(PackedAttr::CreateImplicit(C)); 4123 TgtOffloadEntryQTy = C.getRecordType(RD); 4124 } 4125 return TgtOffloadEntryQTy; 4126 } 4127 4128 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4129 // These are the types we need to build: 4130 // struct __tgt_device_image{ 4131 // void *ImageStart; // Pointer to the target code start. 4132 // void *ImageEnd; // Pointer to the target code end. 4133 // // We also add the host entries to the device image, as it may be useful 4134 // // for the target runtime to have access to that information. 4135 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4136 // // the entries. 4137 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4138 // // entries (non inclusive). 4139 // }; 4140 if (TgtDeviceImageQTy.isNull()) { 4141 ASTContext &C = CGM.getContext(); 4142 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4143 RD->startDefinition(); 4144 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4145 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4146 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4147 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4148 RD->completeDefinition(); 4149 TgtDeviceImageQTy = C.getRecordType(RD); 4150 } 4151 return TgtDeviceImageQTy; 4152 } 4153 4154 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4155 // struct __tgt_bin_desc{ 4156 // int32_t NumDevices; // Number of devices supported. 4157 // __tgt_device_image *DeviceImages; // Arrays of device images 4158 // // (one per device). 4159 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4160 // // entries. 4161 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4162 // // entries (non inclusive). 4163 // }; 4164 if (TgtBinaryDescriptorQTy.isNull()) { 4165 ASTContext &C = CGM.getContext(); 4166 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4167 RD->startDefinition(); 4168 addFieldToRecordDecl( 4169 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4170 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4171 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4172 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4173 RD->completeDefinition(); 4174 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4175 } 4176 return TgtBinaryDescriptorQTy; 4177 } 4178 4179 namespace { 4180 struct PrivateHelpersTy { 4181 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4182 const VarDecl *PrivateElemInit) 4183 : Original(Original), PrivateCopy(PrivateCopy), 4184 PrivateElemInit(PrivateElemInit) {} 4185 const VarDecl *Original; 4186 const VarDecl *PrivateCopy; 4187 const VarDecl *PrivateElemInit; 4188 }; 4189 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4190 } // anonymous namespace 4191 4192 static RecordDecl * 4193 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4194 if (!Privates.empty()) { 4195 ASTContext &C = CGM.getContext(); 4196 // Build struct .kmp_privates_t. { 4197 // /* private vars */ 4198 // }; 4199 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4200 RD->startDefinition(); 4201 for (const auto &Pair : Privates) { 4202 const VarDecl *VD = Pair.second.Original; 4203 QualType Type = VD->getType().getNonReferenceType(); 4204 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4205 if (VD->hasAttrs()) { 4206 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4207 E(VD->getAttrs().end()); 4208 I != E; ++I) 4209 FD->addAttr(*I); 4210 } 4211 } 4212 RD->completeDefinition(); 4213 return RD; 4214 } 4215 return nullptr; 4216 } 4217 4218 static RecordDecl * 4219 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4220 QualType KmpInt32Ty, 4221 QualType KmpRoutineEntryPointerQTy) { 4222 ASTContext &C = CGM.getContext(); 4223 // Build struct kmp_task_t { 4224 // void * shareds; 4225 // kmp_routine_entry_t routine; 4226 // kmp_int32 part_id; 4227 // kmp_cmplrdata_t data1; 4228 // kmp_cmplrdata_t data2; 4229 // For taskloops additional fields: 4230 // kmp_uint64 lb; 4231 // kmp_uint64 ub; 4232 // kmp_int64 st; 4233 // kmp_int32 liter; 4234 // void * reductions; 4235 // }; 4236 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4237 UD->startDefinition(); 4238 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4239 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4240 UD->completeDefinition(); 4241 QualType KmpCmplrdataTy = C.getRecordType(UD); 4242 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4243 RD->startDefinition(); 4244 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4245 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4246 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4247 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4248 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4249 if (isOpenMPTaskLoopDirective(Kind)) { 4250 QualType KmpUInt64Ty = 4251 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4252 QualType KmpInt64Ty = 4253 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4254 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4255 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4256 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4257 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4258 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4259 } 4260 RD->completeDefinition(); 4261 return RD; 4262 } 4263 4264 static RecordDecl * 4265 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4266 ArrayRef<PrivateDataTy> Privates) { 4267 ASTContext &C = CGM.getContext(); 4268 // Build struct kmp_task_t_with_privates { 4269 // kmp_task_t task_data; 4270 // .kmp_privates_t. privates; 4271 // }; 4272 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4273 RD->startDefinition(); 4274 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4275 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4276 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4277 RD->completeDefinition(); 4278 return RD; 4279 } 4280 4281 /// Emit a proxy function which accepts kmp_task_t as the second 4282 /// argument. 4283 /// \code 4284 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4285 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4286 /// For taskloops: 4287 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4288 /// tt->reductions, tt->shareds); 4289 /// return 0; 4290 /// } 4291 /// \endcode 4292 static llvm::Value * 4293 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4294 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4295 QualType KmpTaskTWithPrivatesPtrQTy, 4296 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4297 QualType SharedsPtrTy, llvm::Value *TaskFunction, 4298 llvm::Value *TaskPrivatesMap) { 4299 ASTContext &C = CGM.getContext(); 4300 FunctionArgList Args; 4301 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4302 ImplicitParamDecl::Other); 4303 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4304 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4305 ImplicitParamDecl::Other); 4306 Args.push_back(&GtidArg); 4307 Args.push_back(&TaskTypeArg); 4308 const auto &TaskEntryFnInfo = 4309 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4310 llvm::FunctionType *TaskEntryTy = 4311 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4312 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4313 auto *TaskEntry = llvm::Function::Create( 4314 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4315 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4316 TaskEntry->setDoesNotRecurse(); 4317 CodeGenFunction CGF(CGM); 4318 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4319 Loc, Loc); 4320 4321 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4322 // tt, 4323 // For taskloops: 4324 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4325 // tt->task_data.shareds); 4326 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4327 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4328 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4329 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4330 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4331 const auto *KmpTaskTWithPrivatesQTyRD = 4332 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4333 LValue Base = 4334 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4335 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4336 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4337 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4338 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4339 4340 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4341 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4342 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4343 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4344 CGF.ConvertTypeForMem(SharedsPtrTy)); 4345 4346 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4347 llvm::Value *PrivatesParam; 4348 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4349 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4350 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4351 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4352 } else { 4353 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4354 } 4355 4356 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4357 TaskPrivatesMap, 4358 CGF.Builder 4359 .CreatePointerBitCastOrAddrSpaceCast( 4360 TDBase.getAddress(), CGF.VoidPtrTy) 4361 .getPointer()}; 4362 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4363 std::end(CommonArgs)); 4364 if (isOpenMPTaskLoopDirective(Kind)) { 4365 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4366 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4367 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4368 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4369 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4370 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4371 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4372 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4373 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4374 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4375 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4376 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4377 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4378 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4379 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4380 CallArgs.push_back(LBParam); 4381 CallArgs.push_back(UBParam); 4382 CallArgs.push_back(StParam); 4383 CallArgs.push_back(LIParam); 4384 CallArgs.push_back(RParam); 4385 } 4386 CallArgs.push_back(SharedsParam); 4387 4388 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4389 CallArgs); 4390 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4391 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4392 CGF.FinishFunction(); 4393 return TaskEntry; 4394 } 4395 4396 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4397 SourceLocation Loc, 4398 QualType KmpInt32Ty, 4399 QualType KmpTaskTWithPrivatesPtrQTy, 4400 QualType KmpTaskTWithPrivatesQTy) { 4401 ASTContext &C = CGM.getContext(); 4402 FunctionArgList Args; 4403 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4404 ImplicitParamDecl::Other); 4405 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4406 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4407 ImplicitParamDecl::Other); 4408 Args.push_back(&GtidArg); 4409 Args.push_back(&TaskTypeArg); 4410 const auto &DestructorFnInfo = 4411 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4412 llvm::FunctionType *DestructorFnTy = 4413 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4414 std::string Name = 4415 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4416 auto *DestructorFn = 4417 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4418 Name, &CGM.getModule()); 4419 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4420 DestructorFnInfo); 4421 DestructorFn->setDoesNotRecurse(); 4422 CodeGenFunction CGF(CGM); 4423 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4424 Args, Loc, Loc); 4425 4426 LValue Base = CGF.EmitLoadOfPointerLValue( 4427 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4428 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4429 const auto *KmpTaskTWithPrivatesQTyRD = 4430 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4431 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4432 Base = CGF.EmitLValueForField(Base, *FI); 4433 for (const auto *Field : 4434 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4435 if (QualType::DestructionKind DtorKind = 4436 Field->getType().isDestructedType()) { 4437 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4438 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4439 } 4440 } 4441 CGF.FinishFunction(); 4442 return DestructorFn; 4443 } 4444 4445 /// Emit a privates mapping function for correct handling of private and 4446 /// firstprivate variables. 4447 /// \code 4448 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4449 /// **noalias priv1,..., <tyn> **noalias privn) { 4450 /// *priv1 = &.privates.priv1; 4451 /// ...; 4452 /// *privn = &.privates.privn; 4453 /// } 4454 /// \endcode 4455 static llvm::Value * 4456 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4457 ArrayRef<const Expr *> PrivateVars, 4458 ArrayRef<const Expr *> FirstprivateVars, 4459 ArrayRef<const Expr *> LastprivateVars, 4460 QualType PrivatesQTy, 4461 ArrayRef<PrivateDataTy> Privates) { 4462 ASTContext &C = CGM.getContext(); 4463 FunctionArgList Args; 4464 ImplicitParamDecl TaskPrivatesArg( 4465 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4466 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4467 ImplicitParamDecl::Other); 4468 Args.push_back(&TaskPrivatesArg); 4469 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4470 unsigned Counter = 1; 4471 for (const Expr *E : PrivateVars) { 4472 Args.push_back(ImplicitParamDecl::Create( 4473 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4474 C.getPointerType(C.getPointerType(E->getType())) 4475 .withConst() 4476 .withRestrict(), 4477 ImplicitParamDecl::Other)); 4478 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4479 PrivateVarsPos[VD] = Counter; 4480 ++Counter; 4481 } 4482 for (const Expr *E : FirstprivateVars) { 4483 Args.push_back(ImplicitParamDecl::Create( 4484 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4485 C.getPointerType(C.getPointerType(E->getType())) 4486 .withConst() 4487 .withRestrict(), 4488 ImplicitParamDecl::Other)); 4489 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4490 PrivateVarsPos[VD] = Counter; 4491 ++Counter; 4492 } 4493 for (const Expr *E : LastprivateVars) { 4494 Args.push_back(ImplicitParamDecl::Create( 4495 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4496 C.getPointerType(C.getPointerType(E->getType())) 4497 .withConst() 4498 .withRestrict(), 4499 ImplicitParamDecl::Other)); 4500 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4501 PrivateVarsPos[VD] = Counter; 4502 ++Counter; 4503 } 4504 const auto &TaskPrivatesMapFnInfo = 4505 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4506 llvm::FunctionType *TaskPrivatesMapTy = 4507 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4508 std::string Name = 4509 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4510 auto *TaskPrivatesMap = llvm::Function::Create( 4511 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4512 &CGM.getModule()); 4513 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4514 TaskPrivatesMapFnInfo); 4515 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4516 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4517 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4518 CodeGenFunction CGF(CGM); 4519 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4520 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4521 4522 // *privi = &.privates.privi; 4523 LValue Base = CGF.EmitLoadOfPointerLValue( 4524 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4525 TaskPrivatesArg.getType()->castAs<PointerType>()); 4526 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4527 Counter = 0; 4528 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4529 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4530 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4531 LValue RefLVal = 4532 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4533 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4534 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4535 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4536 ++Counter; 4537 } 4538 CGF.FinishFunction(); 4539 return TaskPrivatesMap; 4540 } 4541 4542 static bool stable_sort_comparator(const PrivateDataTy P1, 4543 const PrivateDataTy P2) { 4544 return P1.first > P2.first; 4545 } 4546 4547 /// Emit initialization for private variables in task-based directives. 4548 static void emitPrivatesInit(CodeGenFunction &CGF, 4549 const OMPExecutableDirective &D, 4550 Address KmpTaskSharedsPtr, LValue TDBase, 4551 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4552 QualType SharedsTy, QualType SharedsPtrTy, 4553 const OMPTaskDataTy &Data, 4554 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4555 ASTContext &C = CGF.getContext(); 4556 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4557 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4558 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4559 ? OMPD_taskloop 4560 : OMPD_task; 4561 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4562 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4563 LValue SrcBase; 4564 bool IsTargetTask = 4565 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4566 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4567 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4568 // PointersArray and SizesArray. The original variables for these arrays are 4569 // not captured and we get their addresses explicitly. 4570 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4571 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4572 SrcBase = CGF.MakeAddrLValue( 4573 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4574 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4575 SharedsTy); 4576 } 4577 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4578 for (const PrivateDataTy &Pair : Privates) { 4579 const VarDecl *VD = Pair.second.PrivateCopy; 4580 const Expr *Init = VD->getAnyInitializer(); 4581 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4582 !CGF.isTrivialInitializer(Init)))) { 4583 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4584 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4585 const VarDecl *OriginalVD = Pair.second.Original; 4586 // Check if the variable is the target-based BasePointersArray, 4587 // PointersArray or SizesArray. 4588 LValue SharedRefLValue; 4589 QualType Type = OriginalVD->getType(); 4590 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4591 if (IsTargetTask && !SharedField) { 4592 assert(isa<ImplicitParamDecl>(OriginalVD) && 4593 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4594 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4595 ->getNumParams() == 0 && 4596 isa<TranslationUnitDecl>( 4597 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4598 ->getDeclContext()) && 4599 "Expected artificial target data variable."); 4600 SharedRefLValue = 4601 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4602 } else { 4603 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4604 SharedRefLValue = CGF.MakeAddrLValue( 4605 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4606 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4607 SharedRefLValue.getTBAAInfo()); 4608 } 4609 if (Type->isArrayType()) { 4610 // Initialize firstprivate array. 4611 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4612 // Perform simple memcpy. 4613 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4614 } else { 4615 // Initialize firstprivate array using element-by-element 4616 // initialization. 4617 CGF.EmitOMPAggregateAssign( 4618 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4619 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4620 Address SrcElement) { 4621 // Clean up any temporaries needed by the initialization. 4622 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4623 InitScope.addPrivate( 4624 Elem, [SrcElement]() -> Address { return SrcElement; }); 4625 (void)InitScope.Privatize(); 4626 // Emit initialization for single element. 4627 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4628 CGF, &CapturesInfo); 4629 CGF.EmitAnyExprToMem(Init, DestElement, 4630 Init->getType().getQualifiers(), 4631 /*IsInitializer=*/false); 4632 }); 4633 } 4634 } else { 4635 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4636 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4637 return SharedRefLValue.getAddress(); 4638 }); 4639 (void)InitScope.Privatize(); 4640 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4641 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4642 /*capturedByInit=*/false); 4643 } 4644 } else { 4645 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4646 } 4647 } 4648 ++FI; 4649 } 4650 } 4651 4652 /// Check if duplication function is required for taskloops. 4653 static bool checkInitIsRequired(CodeGenFunction &CGF, 4654 ArrayRef<PrivateDataTy> Privates) { 4655 bool InitRequired = false; 4656 for (const PrivateDataTy &Pair : Privates) { 4657 const VarDecl *VD = Pair.second.PrivateCopy; 4658 const Expr *Init = VD->getAnyInitializer(); 4659 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4660 !CGF.isTrivialInitializer(Init)); 4661 if (InitRequired) 4662 break; 4663 } 4664 return InitRequired; 4665 } 4666 4667 4668 /// Emit task_dup function (for initialization of 4669 /// private/firstprivate/lastprivate vars and last_iter flag) 4670 /// \code 4671 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4672 /// lastpriv) { 4673 /// // setup lastprivate flag 4674 /// task_dst->last = lastpriv; 4675 /// // could be constructor calls here... 4676 /// } 4677 /// \endcode 4678 static llvm::Value * 4679 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4680 const OMPExecutableDirective &D, 4681 QualType KmpTaskTWithPrivatesPtrQTy, 4682 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4683 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4684 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4685 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4686 ASTContext &C = CGM.getContext(); 4687 FunctionArgList Args; 4688 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4689 KmpTaskTWithPrivatesPtrQTy, 4690 ImplicitParamDecl::Other); 4691 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4692 KmpTaskTWithPrivatesPtrQTy, 4693 ImplicitParamDecl::Other); 4694 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4695 ImplicitParamDecl::Other); 4696 Args.push_back(&DstArg); 4697 Args.push_back(&SrcArg); 4698 Args.push_back(&LastprivArg); 4699 const auto &TaskDupFnInfo = 4700 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4701 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4702 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4703 auto *TaskDup = llvm::Function::Create( 4704 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4705 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4706 TaskDup->setDoesNotRecurse(); 4707 CodeGenFunction CGF(CGM); 4708 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4709 Loc); 4710 4711 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4712 CGF.GetAddrOfLocalVar(&DstArg), 4713 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4714 // task_dst->liter = lastpriv; 4715 if (WithLastIter) { 4716 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4717 LValue Base = CGF.EmitLValueForField( 4718 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4719 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4720 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4721 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4722 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4723 } 4724 4725 // Emit initial values for private copies (if any). 4726 assert(!Privates.empty()); 4727 Address KmpTaskSharedsPtr = Address::invalid(); 4728 if (!Data.FirstprivateVars.empty()) { 4729 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4730 CGF.GetAddrOfLocalVar(&SrcArg), 4731 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4732 LValue Base = CGF.EmitLValueForField( 4733 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4734 KmpTaskSharedsPtr = Address( 4735 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4736 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4737 KmpTaskTShareds)), 4738 Loc), 4739 CGF.getNaturalTypeAlignment(SharedsTy)); 4740 } 4741 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4742 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4743 CGF.FinishFunction(); 4744 return TaskDup; 4745 } 4746 4747 /// Checks if destructor function is required to be generated. 4748 /// \return true if cleanups are required, false otherwise. 4749 static bool 4750 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4751 bool NeedsCleanup = false; 4752 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4753 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4754 for (const FieldDecl *FD : PrivateRD->fields()) { 4755 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4756 if (NeedsCleanup) 4757 break; 4758 } 4759 return NeedsCleanup; 4760 } 4761 4762 CGOpenMPRuntime::TaskResultTy 4763 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4764 const OMPExecutableDirective &D, 4765 llvm::Value *TaskFunction, QualType SharedsTy, 4766 Address Shareds, const OMPTaskDataTy &Data) { 4767 ASTContext &C = CGM.getContext(); 4768 llvm::SmallVector<PrivateDataTy, 4> Privates; 4769 // Aggregate privates and sort them by the alignment. 4770 auto I = Data.PrivateCopies.begin(); 4771 for (const Expr *E : Data.PrivateVars) { 4772 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4773 Privates.emplace_back( 4774 C.getDeclAlign(VD), 4775 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4776 /*PrivateElemInit=*/nullptr)); 4777 ++I; 4778 } 4779 I = Data.FirstprivateCopies.begin(); 4780 auto IElemInitRef = Data.FirstprivateInits.begin(); 4781 for (const Expr *E : Data.FirstprivateVars) { 4782 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4783 Privates.emplace_back( 4784 C.getDeclAlign(VD), 4785 PrivateHelpersTy( 4786 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4787 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4788 ++I; 4789 ++IElemInitRef; 4790 } 4791 I = Data.LastprivateCopies.begin(); 4792 for (const Expr *E : Data.LastprivateVars) { 4793 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4794 Privates.emplace_back( 4795 C.getDeclAlign(VD), 4796 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4797 /*PrivateElemInit=*/nullptr)); 4798 ++I; 4799 } 4800 std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); 4801 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4802 // Build type kmp_routine_entry_t (if not built yet). 4803 emitKmpRoutineEntryT(KmpInt32Ty); 4804 // Build type kmp_task_t (if not built yet). 4805 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4806 if (SavedKmpTaskloopTQTy.isNull()) { 4807 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4808 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4809 } 4810 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4811 } else { 4812 assert((D.getDirectiveKind() == OMPD_task || 4813 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4814 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4815 "Expected taskloop, task or target directive"); 4816 if (SavedKmpTaskTQTy.isNull()) { 4817 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4818 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4819 } 4820 KmpTaskTQTy = SavedKmpTaskTQTy; 4821 } 4822 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4823 // Build particular struct kmp_task_t for the given task. 4824 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4825 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4826 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4827 QualType KmpTaskTWithPrivatesPtrQTy = 4828 C.getPointerType(KmpTaskTWithPrivatesQTy); 4829 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4830 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4831 KmpTaskTWithPrivatesTy->getPointerTo(); 4832 llvm::Value *KmpTaskTWithPrivatesTySize = 4833 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4834 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4835 4836 // Emit initial values for private copies (if any). 4837 llvm::Value *TaskPrivatesMap = nullptr; 4838 llvm::Type *TaskPrivatesMapTy = 4839 std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); 4840 if (!Privates.empty()) { 4841 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4842 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4843 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4844 FI->getType(), Privates); 4845 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4846 TaskPrivatesMap, TaskPrivatesMapTy); 4847 } else { 4848 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4849 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4850 } 4851 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4852 // kmp_task_t *tt); 4853 llvm::Value *TaskEntry = emitProxyTaskFunction( 4854 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4855 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4856 TaskPrivatesMap); 4857 4858 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4859 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4860 // kmp_routine_entry_t *task_entry); 4861 // Task flags. Format is taken from 4862 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 4863 // description of kmp_tasking_flags struct. 4864 enum { 4865 TiedFlag = 0x1, 4866 FinalFlag = 0x2, 4867 DestructorsFlag = 0x8, 4868 PriorityFlag = 0x20 4869 }; 4870 unsigned Flags = Data.Tied ? TiedFlag : 0; 4871 bool NeedsCleanup = false; 4872 if (!Privates.empty()) { 4873 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4874 if (NeedsCleanup) 4875 Flags = Flags | DestructorsFlag; 4876 } 4877 if (Data.Priority.getInt()) 4878 Flags = Flags | PriorityFlag; 4879 llvm::Value *TaskFlags = 4880 Data.Final.getPointer() 4881 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4882 CGF.Builder.getInt32(FinalFlag), 4883 CGF.Builder.getInt32(/*C=*/0)) 4884 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4885 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4886 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4887 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 4888 getThreadID(CGF, Loc), TaskFlags, 4889 KmpTaskTWithPrivatesTySize, SharedsSize, 4890 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4891 TaskEntry, KmpRoutineEntryPtrTy)}; 4892 llvm::Value *NewTask = CGF.EmitRuntimeCall( 4893 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 4894 llvm::Value *NewTaskNewTaskTTy = 4895 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4896 NewTask, KmpTaskTWithPrivatesPtrTy); 4897 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4898 KmpTaskTWithPrivatesQTy); 4899 LValue TDBase = 4900 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4901 // Fill the data in the resulting kmp_task_t record. 4902 // Copy shareds if there are any. 4903 Address KmpTaskSharedsPtr = Address::invalid(); 4904 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4905 KmpTaskSharedsPtr = 4906 Address(CGF.EmitLoadOfScalar( 4907 CGF.EmitLValueForField( 4908 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4909 KmpTaskTShareds)), 4910 Loc), 4911 CGF.getNaturalTypeAlignment(SharedsTy)); 4912 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4913 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4914 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4915 } 4916 // Emit initial values for private copies (if any). 4917 TaskResultTy Result; 4918 if (!Privates.empty()) { 4919 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4920 SharedsTy, SharedsPtrTy, Data, Privates, 4921 /*ForDup=*/false); 4922 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4923 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4924 Result.TaskDupFn = emitTaskDupFunction( 4925 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4926 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4927 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4928 } 4929 } 4930 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4931 enum { Priority = 0, Destructors = 1 }; 4932 // Provide pointer to function with destructors for privates. 4933 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4934 const RecordDecl *KmpCmplrdataUD = 4935 (*FI)->getType()->getAsUnionType()->getDecl(); 4936 if (NeedsCleanup) { 4937 llvm::Value *DestructorFn = emitDestructorsFunction( 4938 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4939 KmpTaskTWithPrivatesQTy); 4940 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4941 LValue DestructorsLV = CGF.EmitLValueForField( 4942 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4943 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4944 DestructorFn, KmpRoutineEntryPtrTy), 4945 DestructorsLV); 4946 } 4947 // Set priority. 4948 if (Data.Priority.getInt()) { 4949 LValue Data2LV = CGF.EmitLValueForField( 4950 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4951 LValue PriorityLV = CGF.EmitLValueForField( 4952 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4953 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4954 } 4955 Result.NewTask = NewTask; 4956 Result.TaskEntry = TaskEntry; 4957 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4958 Result.TDBase = TDBase; 4959 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4960 return Result; 4961 } 4962 4963 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4964 const OMPExecutableDirective &D, 4965 llvm::Value *TaskFunction, 4966 QualType SharedsTy, Address Shareds, 4967 const Expr *IfCond, 4968 const OMPTaskDataTy &Data) { 4969 if (!CGF.HaveInsertPoint()) 4970 return; 4971 4972 TaskResultTy Result = 4973 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4974 llvm::Value *NewTask = Result.NewTask; 4975 llvm::Value *TaskEntry = Result.TaskEntry; 4976 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4977 LValue TDBase = Result.TDBase; 4978 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4979 ASTContext &C = CGM.getContext(); 4980 // Process list of dependences. 4981 Address DependenciesArray = Address::invalid(); 4982 unsigned NumDependencies = Data.Dependences.size(); 4983 if (NumDependencies) { 4984 // Dependence kind for RTL. 4985 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 4986 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4987 RecordDecl *KmpDependInfoRD; 4988 QualType FlagsTy = 4989 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4990 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4991 if (KmpDependInfoTy.isNull()) { 4992 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4993 KmpDependInfoRD->startDefinition(); 4994 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4995 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4996 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4997 KmpDependInfoRD->completeDefinition(); 4998 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4999 } else { 5000 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5001 } 5002 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 5003 // Define type kmp_depend_info[<Dependences.size()>]; 5004 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5005 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5006 ArrayType::Normal, /*IndexTypeQuals=*/0); 5007 // kmp_depend_info[<Dependences.size()>] deps; 5008 DependenciesArray = 5009 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5010 for (unsigned I = 0; I < NumDependencies; ++I) { 5011 const Expr *E = Data.Dependences[I].second; 5012 LValue Addr = CGF.EmitLValue(E); 5013 llvm::Value *Size; 5014 QualType Ty = E->getType(); 5015 if (const auto *ASE = 5016 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5017 LValue UpAddrLVal = 5018 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 5019 llvm::Value *UpAddr = 5020 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5021 llvm::Value *LowIntPtr = 5022 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5023 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5024 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5025 } else { 5026 Size = CGF.getTypeSize(Ty); 5027 } 5028 LValue Base = CGF.MakeAddrLValue( 5029 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize), 5030 KmpDependInfoTy); 5031 // deps[i].base_addr = &<Dependences[i].second>; 5032 LValue BaseAddrLVal = CGF.EmitLValueForField( 5033 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5034 CGF.EmitStoreOfScalar( 5035 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5036 BaseAddrLVal); 5037 // deps[i].len = sizeof(<Dependences[i].second>); 5038 LValue LenLVal = CGF.EmitLValueForField( 5039 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5040 CGF.EmitStoreOfScalar(Size, LenLVal); 5041 // deps[i].flags = <Dependences[i].first>; 5042 RTLDependenceKindTy DepKind; 5043 switch (Data.Dependences[I].first) { 5044 case OMPC_DEPEND_in: 5045 DepKind = DepIn; 5046 break; 5047 // Out and InOut dependencies must use the same code. 5048 case OMPC_DEPEND_out: 5049 case OMPC_DEPEND_inout: 5050 DepKind = DepInOut; 5051 break; 5052 case OMPC_DEPEND_source: 5053 case OMPC_DEPEND_sink: 5054 case OMPC_DEPEND_unknown: 5055 llvm_unreachable("Unknown task dependence type"); 5056 } 5057 LValue FlagsLVal = CGF.EmitLValueForField( 5058 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5059 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5060 FlagsLVal); 5061 } 5062 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5063 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 5064 CGF.VoidPtrTy); 5065 } 5066 5067 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5068 // libcall. 5069 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5070 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5071 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5072 // list is not empty 5073 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5074 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5075 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5076 llvm::Value *DepTaskArgs[7]; 5077 if (NumDependencies) { 5078 DepTaskArgs[0] = UpLoc; 5079 DepTaskArgs[1] = ThreadID; 5080 DepTaskArgs[2] = NewTask; 5081 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5082 DepTaskArgs[4] = DependenciesArray.getPointer(); 5083 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5084 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5085 } 5086 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5087 &TaskArgs, 5088 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5089 if (!Data.Tied) { 5090 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5091 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5092 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5093 } 5094 if (NumDependencies) { 5095 CGF.EmitRuntimeCall( 5096 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5097 } else { 5098 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5099 TaskArgs); 5100 } 5101 // Check if parent region is untied and build return for untied task; 5102 if (auto *Region = 5103 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5104 Region->emitUntiedSwitch(CGF); 5105 }; 5106 5107 llvm::Value *DepWaitTaskArgs[6]; 5108 if (NumDependencies) { 5109 DepWaitTaskArgs[0] = UpLoc; 5110 DepWaitTaskArgs[1] = ThreadID; 5111 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5112 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5113 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5114 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5115 } 5116 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5117 NumDependencies, &DepWaitTaskArgs, 5118 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5119 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5120 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5121 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5122 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5123 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5124 // is specified. 5125 if (NumDependencies) 5126 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5127 DepWaitTaskArgs); 5128 // Call proxy_task_entry(gtid, new_task); 5129 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5130 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5131 Action.Enter(CGF); 5132 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5133 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5134 OutlinedFnArgs); 5135 }; 5136 5137 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5138 // kmp_task_t *new_task); 5139 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5140 // kmp_task_t *new_task); 5141 RegionCodeGenTy RCG(CodeGen); 5142 CommonActionTy Action( 5143 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5144 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5145 RCG.setAction(Action); 5146 RCG(CGF); 5147 }; 5148 5149 if (IfCond) { 5150 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5151 } else { 5152 RegionCodeGenTy ThenRCG(ThenCodeGen); 5153 ThenRCG(CGF); 5154 } 5155 } 5156 5157 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5158 const OMPLoopDirective &D, 5159 llvm::Value *TaskFunction, 5160 QualType SharedsTy, Address Shareds, 5161 const Expr *IfCond, 5162 const OMPTaskDataTy &Data) { 5163 if (!CGF.HaveInsertPoint()) 5164 return; 5165 TaskResultTy Result = 5166 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5167 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5168 // libcall. 5169 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5170 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5171 // sched, kmp_uint64 grainsize, void *task_dup); 5172 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5173 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5174 llvm::Value *IfVal; 5175 if (IfCond) { 5176 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5177 /*isSigned=*/true); 5178 } else { 5179 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5180 } 5181 5182 LValue LBLVal = CGF.EmitLValueForField( 5183 Result.TDBase, 5184 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5185 const auto *LBVar = 5186 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5187 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5188 /*IsInitializer=*/true); 5189 LValue UBLVal = CGF.EmitLValueForField( 5190 Result.TDBase, 5191 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5192 const auto *UBVar = 5193 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5194 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5195 /*IsInitializer=*/true); 5196 LValue StLVal = CGF.EmitLValueForField( 5197 Result.TDBase, 5198 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5199 const auto *StVar = 5200 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5201 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5202 /*IsInitializer=*/true); 5203 // Store reductions address. 5204 LValue RedLVal = CGF.EmitLValueForField( 5205 Result.TDBase, 5206 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5207 if (Data.Reductions) { 5208 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5209 } else { 5210 CGF.EmitNullInitialization(RedLVal.getAddress(), 5211 CGF.getContext().VoidPtrTy); 5212 } 5213 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5214 llvm::Value *TaskArgs[] = { 5215 UpLoc, 5216 ThreadID, 5217 Result.NewTask, 5218 IfVal, 5219 LBLVal.getPointer(), 5220 UBLVal.getPointer(), 5221 CGF.EmitLoadOfScalar(StLVal, Loc), 5222 llvm::ConstantInt::getNullValue( 5223 CGF.IntTy), // Always 0 because taskgroup emitted by the compiler 5224 llvm::ConstantInt::getSigned( 5225 CGF.IntTy, Data.Schedule.getPointer() 5226 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5227 : NoSchedule), 5228 Data.Schedule.getPointer() 5229 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5230 /*isSigned=*/false) 5231 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5232 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5233 Result.TaskDupFn, CGF.VoidPtrTy) 5234 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5235 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5236 } 5237 5238 /// Emit reduction operation for each element of array (required for 5239 /// array sections) LHS op = RHS. 5240 /// \param Type Type of array. 5241 /// \param LHSVar Variable on the left side of the reduction operation 5242 /// (references element of array in original variable). 5243 /// \param RHSVar Variable on the right side of the reduction operation 5244 /// (references element of array in original variable). 5245 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5246 /// RHSVar. 5247 static void EmitOMPAggregateReduction( 5248 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5249 const VarDecl *RHSVar, 5250 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5251 const Expr *, const Expr *)> &RedOpGen, 5252 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5253 const Expr *UpExpr = nullptr) { 5254 // Perform element-by-element initialization. 5255 QualType ElementTy; 5256 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5257 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5258 5259 // Drill down to the base element type on both arrays. 5260 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5261 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5262 5263 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5264 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5265 // Cast from pointer to array type to pointer to single element. 5266 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5267 // The basic structure here is a while-do loop. 5268 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5269 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5270 llvm::Value *IsEmpty = 5271 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5272 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5273 5274 // Enter the loop body, making that address the current address. 5275 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5276 CGF.EmitBlock(BodyBB); 5277 5278 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5279 5280 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5281 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5282 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5283 Address RHSElementCurrent = 5284 Address(RHSElementPHI, 5285 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5286 5287 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5288 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5289 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5290 Address LHSElementCurrent = 5291 Address(LHSElementPHI, 5292 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5293 5294 // Emit copy. 5295 CodeGenFunction::OMPPrivateScope Scope(CGF); 5296 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5297 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5298 Scope.Privatize(); 5299 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5300 Scope.ForceCleanup(); 5301 5302 // Shift the address forward by one element. 5303 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5304 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5305 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5306 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5307 // Check whether we've reached the end. 5308 llvm::Value *Done = 5309 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5310 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5311 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5312 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5313 5314 // Done. 5315 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5316 } 5317 5318 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5319 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5320 /// UDR combiner function. 5321 static void emitReductionCombiner(CodeGenFunction &CGF, 5322 const Expr *ReductionOp) { 5323 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5324 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5325 if (const auto *DRE = 5326 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5327 if (const auto *DRD = 5328 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5329 std::pair<llvm::Function *, llvm::Function *> Reduction = 5330 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5331 RValue Func = RValue::get(Reduction.first); 5332 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5333 CGF.EmitIgnoredExpr(ReductionOp); 5334 return; 5335 } 5336 CGF.EmitIgnoredExpr(ReductionOp); 5337 } 5338 5339 llvm::Value *CGOpenMPRuntime::emitReductionFunction( 5340 CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, 5341 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5342 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5343 ASTContext &C = CGM.getContext(); 5344 5345 // void reduction_func(void *LHSArg, void *RHSArg); 5346 FunctionArgList Args; 5347 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5348 ImplicitParamDecl::Other); 5349 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5350 ImplicitParamDecl::Other); 5351 Args.push_back(&LHSArg); 5352 Args.push_back(&RHSArg); 5353 const auto &CGFI = 5354 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5355 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5356 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5357 llvm::GlobalValue::InternalLinkage, Name, 5358 &CGM.getModule()); 5359 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5360 Fn->setDoesNotRecurse(); 5361 CodeGenFunction CGF(CGM); 5362 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5363 5364 // Dst = (void*[n])(LHSArg); 5365 // Src = (void*[n])(RHSArg); 5366 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5367 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5368 ArgsType), CGF.getPointerAlign()); 5369 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5370 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5371 ArgsType), CGF.getPointerAlign()); 5372 5373 // ... 5374 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5375 // ... 5376 CodeGenFunction::OMPPrivateScope Scope(CGF); 5377 auto IPriv = Privates.begin(); 5378 unsigned Idx = 0; 5379 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5380 const auto *RHSVar = 5381 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5382 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5383 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5384 }); 5385 const auto *LHSVar = 5386 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5387 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5388 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5389 }); 5390 QualType PrivTy = (*IPriv)->getType(); 5391 if (PrivTy->isVariablyModifiedType()) { 5392 // Get array size and emit VLA type. 5393 ++Idx; 5394 Address Elem = 5395 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 5396 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5397 const VariableArrayType *VLA = 5398 CGF.getContext().getAsVariableArrayType(PrivTy); 5399 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5400 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5401 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5402 CGF.EmitVariablyModifiedType(PrivTy); 5403 } 5404 } 5405 Scope.Privatize(); 5406 IPriv = Privates.begin(); 5407 auto ILHS = LHSExprs.begin(); 5408 auto IRHS = RHSExprs.begin(); 5409 for (const Expr *E : ReductionOps) { 5410 if ((*IPriv)->getType()->isArrayType()) { 5411 // Emit reduction for array section. 5412 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5413 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5414 EmitOMPAggregateReduction( 5415 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5416 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5417 emitReductionCombiner(CGF, E); 5418 }); 5419 } else { 5420 // Emit reduction for array subscript or single variable. 5421 emitReductionCombiner(CGF, E); 5422 } 5423 ++IPriv; 5424 ++ILHS; 5425 ++IRHS; 5426 } 5427 Scope.ForceCleanup(); 5428 CGF.FinishFunction(); 5429 return Fn; 5430 } 5431 5432 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5433 const Expr *ReductionOp, 5434 const Expr *PrivateRef, 5435 const DeclRefExpr *LHS, 5436 const DeclRefExpr *RHS) { 5437 if (PrivateRef->getType()->isArrayType()) { 5438 // Emit reduction for array section. 5439 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5440 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5441 EmitOMPAggregateReduction( 5442 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5443 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5444 emitReductionCombiner(CGF, ReductionOp); 5445 }); 5446 } else { 5447 // Emit reduction for array subscript or single variable. 5448 emitReductionCombiner(CGF, ReductionOp); 5449 } 5450 } 5451 5452 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5453 ArrayRef<const Expr *> Privates, 5454 ArrayRef<const Expr *> LHSExprs, 5455 ArrayRef<const Expr *> RHSExprs, 5456 ArrayRef<const Expr *> ReductionOps, 5457 ReductionOptionsTy Options) { 5458 if (!CGF.HaveInsertPoint()) 5459 return; 5460 5461 bool WithNowait = Options.WithNowait; 5462 bool SimpleReduction = Options.SimpleReduction; 5463 5464 // Next code should be emitted for reduction: 5465 // 5466 // static kmp_critical_name lock = { 0 }; 5467 // 5468 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5469 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5470 // ... 5471 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5472 // *(Type<n>-1*)rhs[<n>-1]); 5473 // } 5474 // 5475 // ... 5476 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5477 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5478 // RedList, reduce_func, &<lock>)) { 5479 // case 1: 5480 // ... 5481 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5482 // ... 5483 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5484 // break; 5485 // case 2: 5486 // ... 5487 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5488 // ... 5489 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5490 // break; 5491 // default:; 5492 // } 5493 // 5494 // if SimpleReduction is true, only the next code is generated: 5495 // ... 5496 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5497 // ... 5498 5499 ASTContext &C = CGM.getContext(); 5500 5501 if (SimpleReduction) { 5502 CodeGenFunction::RunCleanupsScope Scope(CGF); 5503 auto IPriv = Privates.begin(); 5504 auto ILHS = LHSExprs.begin(); 5505 auto IRHS = RHSExprs.begin(); 5506 for (const Expr *E : ReductionOps) { 5507 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5508 cast<DeclRefExpr>(*IRHS)); 5509 ++IPriv; 5510 ++ILHS; 5511 ++IRHS; 5512 } 5513 return; 5514 } 5515 5516 // 1. Build a list of reduction variables. 5517 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5518 auto Size = RHSExprs.size(); 5519 for (const Expr *E : Privates) { 5520 if (E->getType()->isVariablyModifiedType()) 5521 // Reserve place for array size. 5522 ++Size; 5523 } 5524 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5525 QualType ReductionArrayTy = 5526 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5527 /*IndexTypeQuals=*/0); 5528 Address ReductionList = 5529 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5530 auto IPriv = Privates.begin(); 5531 unsigned Idx = 0; 5532 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5533 Address Elem = 5534 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 5535 CGF.Builder.CreateStore( 5536 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5537 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5538 Elem); 5539 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5540 // Store array size. 5541 ++Idx; 5542 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 5543 CGF.getPointerSize()); 5544 llvm::Value *Size = CGF.Builder.CreateIntCast( 5545 CGF.getVLASize( 5546 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5547 .NumElts, 5548 CGF.SizeTy, /*isSigned=*/false); 5549 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5550 Elem); 5551 } 5552 } 5553 5554 // 2. Emit reduce_func(). 5555 llvm::Value *ReductionFn = emitReductionFunction( 5556 CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), 5557 Privates, LHSExprs, RHSExprs, ReductionOps); 5558 5559 // 3. Create static kmp_critical_name lock = { 0 }; 5560 std::string Name = getName({"reduction"}); 5561 llvm::Value *Lock = getCriticalRegionLock(Name); 5562 5563 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5564 // RedList, reduce_func, &<lock>); 5565 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5566 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5567 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5568 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5569 ReductionList.getPointer(), CGF.VoidPtrTy); 5570 llvm::Value *Args[] = { 5571 IdentTLoc, // ident_t *<loc> 5572 ThreadId, // i32 <gtid> 5573 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5574 ReductionArrayTySize, // size_type sizeof(RedList) 5575 RL, // void *RedList 5576 ReductionFn, // void (*) (void *, void *) <reduce_func> 5577 Lock // kmp_critical_name *&<lock> 5578 }; 5579 llvm::Value *Res = CGF.EmitRuntimeCall( 5580 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5581 : OMPRTL__kmpc_reduce), 5582 Args); 5583 5584 // 5. Build switch(res) 5585 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5586 llvm::SwitchInst *SwInst = 5587 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5588 5589 // 6. Build case 1: 5590 // ... 5591 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5592 // ... 5593 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5594 // break; 5595 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5596 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5597 CGF.EmitBlock(Case1BB); 5598 5599 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5600 llvm::Value *EndArgs[] = { 5601 IdentTLoc, // ident_t *<loc> 5602 ThreadId, // i32 <gtid> 5603 Lock // kmp_critical_name *&<lock> 5604 }; 5605 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5606 CodeGenFunction &CGF, PrePostActionTy &Action) { 5607 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5608 auto IPriv = Privates.begin(); 5609 auto ILHS = LHSExprs.begin(); 5610 auto IRHS = RHSExprs.begin(); 5611 for (const Expr *E : ReductionOps) { 5612 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5613 cast<DeclRefExpr>(*IRHS)); 5614 ++IPriv; 5615 ++ILHS; 5616 ++IRHS; 5617 } 5618 }; 5619 RegionCodeGenTy RCG(CodeGen); 5620 CommonActionTy Action( 5621 nullptr, llvm::None, 5622 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5623 : OMPRTL__kmpc_end_reduce), 5624 EndArgs); 5625 RCG.setAction(Action); 5626 RCG(CGF); 5627 5628 CGF.EmitBranch(DefaultBB); 5629 5630 // 7. Build case 2: 5631 // ... 5632 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5633 // ... 5634 // break; 5635 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5636 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5637 CGF.EmitBlock(Case2BB); 5638 5639 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5640 CodeGenFunction &CGF, PrePostActionTy &Action) { 5641 auto ILHS = LHSExprs.begin(); 5642 auto IRHS = RHSExprs.begin(); 5643 auto IPriv = Privates.begin(); 5644 for (const Expr *E : ReductionOps) { 5645 const Expr *XExpr = nullptr; 5646 const Expr *EExpr = nullptr; 5647 const Expr *UpExpr = nullptr; 5648 BinaryOperatorKind BO = BO_Comma; 5649 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5650 if (BO->getOpcode() == BO_Assign) { 5651 XExpr = BO->getLHS(); 5652 UpExpr = BO->getRHS(); 5653 } 5654 } 5655 // Try to emit update expression as a simple atomic. 5656 const Expr *RHSExpr = UpExpr; 5657 if (RHSExpr) { 5658 // Analyze RHS part of the whole expression. 5659 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5660 RHSExpr->IgnoreParenImpCasts())) { 5661 // If this is a conditional operator, analyze its condition for 5662 // min/max reduction operator. 5663 RHSExpr = ACO->getCond(); 5664 } 5665 if (const auto *BORHS = 5666 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5667 EExpr = BORHS->getRHS(); 5668 BO = BORHS->getOpcode(); 5669 } 5670 } 5671 if (XExpr) { 5672 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5673 auto &&AtomicRedGen = [BO, VD, 5674 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5675 const Expr *EExpr, const Expr *UpExpr) { 5676 LValue X = CGF.EmitLValue(XExpr); 5677 RValue E; 5678 if (EExpr) 5679 E = CGF.EmitAnyExpr(EExpr); 5680 CGF.EmitOMPAtomicSimpleUpdateExpr( 5681 X, E, BO, /*IsXLHSInRHSPart=*/true, 5682 llvm::AtomicOrdering::Monotonic, Loc, 5683 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5684 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5685 PrivateScope.addPrivate( 5686 VD, [&CGF, VD, XRValue, Loc]() { 5687 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5688 CGF.emitOMPSimpleStore( 5689 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5690 VD->getType().getNonReferenceType(), Loc); 5691 return LHSTemp; 5692 }); 5693 (void)PrivateScope.Privatize(); 5694 return CGF.EmitAnyExpr(UpExpr); 5695 }); 5696 }; 5697 if ((*IPriv)->getType()->isArrayType()) { 5698 // Emit atomic reduction for array section. 5699 const auto *RHSVar = 5700 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5701 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5702 AtomicRedGen, XExpr, EExpr, UpExpr); 5703 } else { 5704 // Emit atomic reduction for array subscript or single variable. 5705 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5706 } 5707 } else { 5708 // Emit as a critical region. 5709 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5710 const Expr *, const Expr *) { 5711 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5712 std::string Name = RT.getName({"atomic_reduction"}); 5713 RT.emitCriticalRegion( 5714 CGF, Name, 5715 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5716 Action.Enter(CGF); 5717 emitReductionCombiner(CGF, E); 5718 }, 5719 Loc); 5720 }; 5721 if ((*IPriv)->getType()->isArrayType()) { 5722 const auto *LHSVar = 5723 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5724 const auto *RHSVar = 5725 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5726 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5727 CritRedGen); 5728 } else { 5729 CritRedGen(CGF, nullptr, nullptr, nullptr); 5730 } 5731 } 5732 ++ILHS; 5733 ++IRHS; 5734 ++IPriv; 5735 } 5736 }; 5737 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5738 if (!WithNowait) { 5739 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5740 llvm::Value *EndArgs[] = { 5741 IdentTLoc, // ident_t *<loc> 5742 ThreadId, // i32 <gtid> 5743 Lock // kmp_critical_name *&<lock> 5744 }; 5745 CommonActionTy Action(nullptr, llvm::None, 5746 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5747 EndArgs); 5748 AtomicRCG.setAction(Action); 5749 AtomicRCG(CGF); 5750 } else { 5751 AtomicRCG(CGF); 5752 } 5753 5754 CGF.EmitBranch(DefaultBB); 5755 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5756 } 5757 5758 /// Generates unique name for artificial threadprivate variables. 5759 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5760 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5761 const Expr *Ref) { 5762 SmallString<256> Buffer; 5763 llvm::raw_svector_ostream Out(Buffer); 5764 const clang::DeclRefExpr *DE; 5765 const VarDecl *D = ::getBaseDecl(Ref, DE); 5766 if (!D) 5767 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5768 D = D->getCanonicalDecl(); 5769 std::string Name = CGM.getOpenMPRuntime().getName( 5770 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5771 Out << Prefix << Name << "_" 5772 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5773 return Out.str(); 5774 } 5775 5776 /// Emits reduction initializer function: 5777 /// \code 5778 /// void @.red_init(void* %arg) { 5779 /// %0 = bitcast void* %arg to <type>* 5780 /// store <type> <init>, <type>* %0 5781 /// ret void 5782 /// } 5783 /// \endcode 5784 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5785 SourceLocation Loc, 5786 ReductionCodeGen &RCG, unsigned N) { 5787 ASTContext &C = CGM.getContext(); 5788 FunctionArgList Args; 5789 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5790 ImplicitParamDecl::Other); 5791 Args.emplace_back(&Param); 5792 const auto &FnInfo = 5793 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5794 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5795 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5796 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5797 Name, &CGM.getModule()); 5798 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5799 Fn->setDoesNotRecurse(); 5800 CodeGenFunction CGF(CGM); 5801 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5802 Address PrivateAddr = CGF.EmitLoadOfPointer( 5803 CGF.GetAddrOfLocalVar(&Param), 5804 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5805 llvm::Value *Size = nullptr; 5806 // If the size of the reduction item is non-constant, load it from global 5807 // threadprivate variable. 5808 if (RCG.getSizes(N).second) { 5809 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5810 CGF, CGM.getContext().getSizeType(), 5811 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5812 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5813 CGM.getContext().getSizeType(), Loc); 5814 } 5815 RCG.emitAggregateType(CGF, N, Size); 5816 LValue SharedLVal; 5817 // If initializer uses initializer from declare reduction construct, emit a 5818 // pointer to the address of the original reduction item (reuired by reduction 5819 // initializer) 5820 if (RCG.usesReductionInitializer(N)) { 5821 Address SharedAddr = 5822 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5823 CGF, CGM.getContext().VoidPtrTy, 5824 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5825 SharedAddr = CGF.EmitLoadOfPointer( 5826 SharedAddr, 5827 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5828 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5829 } else { 5830 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5831 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5832 CGM.getContext().VoidPtrTy); 5833 } 5834 // Emit the initializer: 5835 // %0 = bitcast void* %arg to <type>* 5836 // store <type> <init>, <type>* %0 5837 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5838 [](CodeGenFunction &) { return false; }); 5839 CGF.FinishFunction(); 5840 return Fn; 5841 } 5842 5843 /// Emits reduction combiner function: 5844 /// \code 5845 /// void @.red_comb(void* %arg0, void* %arg1) { 5846 /// %lhs = bitcast void* %arg0 to <type>* 5847 /// %rhs = bitcast void* %arg1 to <type>* 5848 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5849 /// store <type> %2, <type>* %lhs 5850 /// ret void 5851 /// } 5852 /// \endcode 5853 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5854 SourceLocation Loc, 5855 ReductionCodeGen &RCG, unsigned N, 5856 const Expr *ReductionOp, 5857 const Expr *LHS, const Expr *RHS, 5858 const Expr *PrivateRef) { 5859 ASTContext &C = CGM.getContext(); 5860 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5861 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5862 FunctionArgList Args; 5863 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5864 C.VoidPtrTy, ImplicitParamDecl::Other); 5865 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5866 ImplicitParamDecl::Other); 5867 Args.emplace_back(&ParamInOut); 5868 Args.emplace_back(&ParamIn); 5869 const auto &FnInfo = 5870 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5871 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5872 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5873 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5874 Name, &CGM.getModule()); 5875 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5876 Fn->setDoesNotRecurse(); 5877 CodeGenFunction CGF(CGM); 5878 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5879 llvm::Value *Size = nullptr; 5880 // If the size of the reduction item is non-constant, load it from global 5881 // threadprivate variable. 5882 if (RCG.getSizes(N).second) { 5883 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5884 CGF, CGM.getContext().getSizeType(), 5885 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5886 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5887 CGM.getContext().getSizeType(), Loc); 5888 } 5889 RCG.emitAggregateType(CGF, N, Size); 5890 // Remap lhs and rhs variables to the addresses of the function arguments. 5891 // %lhs = bitcast void* %arg0 to <type>* 5892 // %rhs = bitcast void* %arg1 to <type>* 5893 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5894 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5895 // Pull out the pointer to the variable. 5896 Address PtrAddr = CGF.EmitLoadOfPointer( 5897 CGF.GetAddrOfLocalVar(&ParamInOut), 5898 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5899 return CGF.Builder.CreateElementBitCast( 5900 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5901 }); 5902 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5903 // Pull out the pointer to the variable. 5904 Address PtrAddr = CGF.EmitLoadOfPointer( 5905 CGF.GetAddrOfLocalVar(&ParamIn), 5906 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5907 return CGF.Builder.CreateElementBitCast( 5908 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5909 }); 5910 PrivateScope.Privatize(); 5911 // Emit the combiner body: 5912 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5913 // store <type> %2, <type>* %lhs 5914 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5915 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5916 cast<DeclRefExpr>(RHS)); 5917 CGF.FinishFunction(); 5918 return Fn; 5919 } 5920 5921 /// Emits reduction finalizer function: 5922 /// \code 5923 /// void @.red_fini(void* %arg) { 5924 /// %0 = bitcast void* %arg to <type>* 5925 /// <destroy>(<type>* %0) 5926 /// ret void 5927 /// } 5928 /// \endcode 5929 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5930 SourceLocation Loc, 5931 ReductionCodeGen &RCG, unsigned N) { 5932 if (!RCG.needCleanups(N)) 5933 return nullptr; 5934 ASTContext &C = CGM.getContext(); 5935 FunctionArgList Args; 5936 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5937 ImplicitParamDecl::Other); 5938 Args.emplace_back(&Param); 5939 const auto &FnInfo = 5940 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5941 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5942 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5943 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5944 Name, &CGM.getModule()); 5945 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5946 Fn->setDoesNotRecurse(); 5947 CodeGenFunction CGF(CGM); 5948 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5949 Address PrivateAddr = CGF.EmitLoadOfPointer( 5950 CGF.GetAddrOfLocalVar(&Param), 5951 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5952 llvm::Value *Size = nullptr; 5953 // If the size of the reduction item is non-constant, load it from global 5954 // threadprivate variable. 5955 if (RCG.getSizes(N).second) { 5956 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5957 CGF, CGM.getContext().getSizeType(), 5958 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5959 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5960 CGM.getContext().getSizeType(), Loc); 5961 } 5962 RCG.emitAggregateType(CGF, N, Size); 5963 // Emit the finalizer body: 5964 // <destroy>(<type>* %0) 5965 RCG.emitCleanups(CGF, N, PrivateAddr); 5966 CGF.FinishFunction(); 5967 return Fn; 5968 } 5969 5970 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5971 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5972 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5973 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5974 return nullptr; 5975 5976 // Build typedef struct: 5977 // kmp_task_red_input { 5978 // void *reduce_shar; // shared reduction item 5979 // size_t reduce_size; // size of data item 5980 // void *reduce_init; // data initialization routine 5981 // void *reduce_fini; // data finalization routine 5982 // void *reduce_comb; // data combiner routine 5983 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5984 // } kmp_task_red_input_t; 5985 ASTContext &C = CGM.getContext(); 5986 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 5987 RD->startDefinition(); 5988 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5989 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5990 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5991 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5992 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5993 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5994 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5995 RD->completeDefinition(); 5996 QualType RDType = C.getRecordType(RD); 5997 unsigned Size = Data.ReductionVars.size(); 5998 llvm::APInt ArraySize(/*numBits=*/64, Size); 5999 QualType ArrayRDType = C.getConstantArrayType( 6000 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6001 // kmp_task_red_input_t .rd_input.[Size]; 6002 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6003 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6004 Data.ReductionOps); 6005 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6006 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6007 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6008 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6009 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6010 TaskRedInput.getPointer(), Idxs, 6011 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6012 ".rd_input.gep."); 6013 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6014 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6015 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6016 RCG.emitSharedLValue(CGF, Cnt); 6017 llvm::Value *CastedShared = 6018 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6019 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6020 RCG.emitAggregateType(CGF, Cnt); 6021 llvm::Value *SizeValInChars; 6022 llvm::Value *SizeVal; 6023 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6024 // We use delayed creation/initialization for VLAs, array sections and 6025 // custom reduction initializations. It is required because runtime does not 6026 // provide the way to pass the sizes of VLAs/array sections to 6027 // initializer/combiner/finalizer functions and does not pass the pointer to 6028 // original reduction item to the initializer. Instead threadprivate global 6029 // variables are used to store these values and use them in the functions. 6030 bool DelayedCreation = !!SizeVal; 6031 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6032 /*isSigned=*/false); 6033 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6034 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6035 // ElemLVal.reduce_init = init; 6036 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6037 llvm::Value *InitAddr = 6038 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6039 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6040 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6041 // ElemLVal.reduce_fini = fini; 6042 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6043 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6044 llvm::Value *FiniAddr = Fini 6045 ? CGF.EmitCastToVoidPtr(Fini) 6046 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6047 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6048 // ElemLVal.reduce_comb = comb; 6049 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6050 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6051 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6052 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6053 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6054 // ElemLVal.flags = 0; 6055 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6056 if (DelayedCreation) { 6057 CGF.EmitStoreOfScalar( 6058 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 6059 FlagsLVal); 6060 } else 6061 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6062 } 6063 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6064 // *data); 6065 llvm::Value *Args[] = { 6066 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6067 /*isSigned=*/true), 6068 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6069 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6070 CGM.VoidPtrTy)}; 6071 return CGF.EmitRuntimeCall( 6072 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6073 } 6074 6075 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6076 SourceLocation Loc, 6077 ReductionCodeGen &RCG, 6078 unsigned N) { 6079 auto Sizes = RCG.getSizes(N); 6080 // Emit threadprivate global variable if the type is non-constant 6081 // (Sizes.second = nullptr). 6082 if (Sizes.second) { 6083 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6084 /*isSigned=*/false); 6085 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6086 CGF, CGM.getContext().getSizeType(), 6087 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6088 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6089 } 6090 // Store address of the original reduction item if custom initializer is used. 6091 if (RCG.usesReductionInitializer(N)) { 6092 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6093 CGF, CGM.getContext().VoidPtrTy, 6094 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6095 CGF.Builder.CreateStore( 6096 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6097 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6098 SharedAddr, /*IsVolatile=*/false); 6099 } 6100 } 6101 6102 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6103 SourceLocation Loc, 6104 llvm::Value *ReductionsPtr, 6105 LValue SharedLVal) { 6106 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6107 // *d); 6108 llvm::Value *Args[] = { 6109 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6110 /*isSigned=*/true), 6111 ReductionsPtr, 6112 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6113 CGM.VoidPtrTy)}; 6114 return Address( 6115 CGF.EmitRuntimeCall( 6116 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6117 SharedLVal.getAlignment()); 6118 } 6119 6120 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6121 SourceLocation Loc) { 6122 if (!CGF.HaveInsertPoint()) 6123 return; 6124 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6125 // global_tid); 6126 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6127 // Ignore return result until untied tasks are supported. 6128 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6129 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6130 Region->emitUntiedSwitch(CGF); 6131 } 6132 6133 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6134 OpenMPDirectiveKind InnerKind, 6135 const RegionCodeGenTy &CodeGen, 6136 bool HasCancel) { 6137 if (!CGF.HaveInsertPoint()) 6138 return; 6139 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6140 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6141 } 6142 6143 namespace { 6144 enum RTCancelKind { 6145 CancelNoreq = 0, 6146 CancelParallel = 1, 6147 CancelLoop = 2, 6148 CancelSections = 3, 6149 CancelTaskgroup = 4 6150 }; 6151 } // anonymous namespace 6152 6153 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6154 RTCancelKind CancelKind = CancelNoreq; 6155 if (CancelRegion == OMPD_parallel) 6156 CancelKind = CancelParallel; 6157 else if (CancelRegion == OMPD_for) 6158 CancelKind = CancelLoop; 6159 else if (CancelRegion == OMPD_sections) 6160 CancelKind = CancelSections; 6161 else { 6162 assert(CancelRegion == OMPD_taskgroup); 6163 CancelKind = CancelTaskgroup; 6164 } 6165 return CancelKind; 6166 } 6167 6168 void CGOpenMPRuntime::emitCancellationPointCall( 6169 CodeGenFunction &CGF, SourceLocation Loc, 6170 OpenMPDirectiveKind CancelRegion) { 6171 if (!CGF.HaveInsertPoint()) 6172 return; 6173 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6174 // global_tid, kmp_int32 cncl_kind); 6175 if (auto *OMPRegionInfo = 6176 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6177 // For 'cancellation point taskgroup', the task region info may not have a 6178 // cancel. This may instead happen in another adjacent task. 6179 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6180 llvm::Value *Args[] = { 6181 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6182 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6183 // Ignore return result until untied tasks are supported. 6184 llvm::Value *Result = CGF.EmitRuntimeCall( 6185 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6186 // if (__kmpc_cancellationpoint()) { 6187 // exit from construct; 6188 // } 6189 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6190 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6191 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6192 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6193 CGF.EmitBlock(ExitBB); 6194 // exit from construct; 6195 CodeGenFunction::JumpDest CancelDest = 6196 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6197 CGF.EmitBranchThroughCleanup(CancelDest); 6198 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6199 } 6200 } 6201 } 6202 6203 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6204 const Expr *IfCond, 6205 OpenMPDirectiveKind CancelRegion) { 6206 if (!CGF.HaveInsertPoint()) 6207 return; 6208 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6209 // kmp_int32 cncl_kind); 6210 if (auto *OMPRegionInfo = 6211 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6212 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6213 PrePostActionTy &) { 6214 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6215 llvm::Value *Args[] = { 6216 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6217 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6218 // Ignore return result until untied tasks are supported. 6219 llvm::Value *Result = CGF.EmitRuntimeCall( 6220 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6221 // if (__kmpc_cancel()) { 6222 // exit from construct; 6223 // } 6224 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6225 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6226 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6227 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6228 CGF.EmitBlock(ExitBB); 6229 // exit from construct; 6230 CodeGenFunction::JumpDest CancelDest = 6231 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6232 CGF.EmitBranchThroughCleanup(CancelDest); 6233 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6234 }; 6235 if (IfCond) { 6236 emitOMPIfClause(CGF, IfCond, ThenGen, 6237 [](CodeGenFunction &, PrePostActionTy &) {}); 6238 } else { 6239 RegionCodeGenTy ThenRCG(ThenGen); 6240 ThenRCG(CGF); 6241 } 6242 } 6243 } 6244 6245 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6246 const OMPExecutableDirective &D, StringRef ParentName, 6247 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6248 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6249 assert(!ParentName.empty() && "Invalid target region parent name!"); 6250 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6251 IsOffloadEntry, CodeGen); 6252 } 6253 6254 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6255 const OMPExecutableDirective &D, StringRef ParentName, 6256 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6257 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6258 // Create a unique name for the entry function using the source location 6259 // information of the current target region. The name will be something like: 6260 // 6261 // __omp_offloading_DD_FFFF_PP_lBB 6262 // 6263 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6264 // mangled name of the function that encloses the target region and BB is the 6265 // line number of the target region. 6266 6267 unsigned DeviceID; 6268 unsigned FileID; 6269 unsigned Line; 6270 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6271 Line); 6272 SmallString<64> EntryFnName; 6273 { 6274 llvm::raw_svector_ostream OS(EntryFnName); 6275 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6276 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6277 } 6278 6279 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6280 6281 CodeGenFunction CGF(CGM, true); 6282 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6283 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6284 6285 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6286 6287 // If this target outline function is not an offload entry, we don't need to 6288 // register it. 6289 if (!IsOffloadEntry) 6290 return; 6291 6292 // The target region ID is used by the runtime library to identify the current 6293 // target region, so it only has to be unique and not necessarily point to 6294 // anything. It could be the pointer to the outlined function that implements 6295 // the target region, but we aren't using that so that the compiler doesn't 6296 // need to keep that, and could therefore inline the host function if proven 6297 // worthwhile during optimization. In the other hand, if emitting code for the 6298 // device, the ID has to be the function address so that it can retrieved from 6299 // the offloading entry and launched by the runtime library. We also mark the 6300 // outlined function to have external linkage in case we are emitting code for 6301 // the device, because these functions will be entry points to the device. 6302 6303 if (CGM.getLangOpts().OpenMPIsDevice) { 6304 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6305 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6306 OutlinedFn->setDSOLocal(false); 6307 } else { 6308 std::string Name = getName({EntryFnName, "region_id"}); 6309 OutlinedFnID = new llvm::GlobalVariable( 6310 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6311 llvm::GlobalValue::WeakAnyLinkage, 6312 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6313 } 6314 6315 // Register the information for the entry associated with this target region. 6316 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6317 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6318 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6319 } 6320 6321 /// discard all CompoundStmts intervening between two constructs 6322 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 6323 while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 6324 Body = CS->body_front(); 6325 6326 return Body; 6327 } 6328 6329 /// Emit the number of teams for a target directive. Inspect the num_teams 6330 /// clause associated with a teams construct combined or closely nested 6331 /// with the target directive. 6332 /// 6333 /// Emit a team of size one for directives such as 'target parallel' that 6334 /// have no associated teams construct. 6335 /// 6336 /// Otherwise, return nullptr. 6337 static llvm::Value * 6338 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6339 CodeGenFunction &CGF, 6340 const OMPExecutableDirective &D) { 6341 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6342 "teams directive expected to be " 6343 "emitted only for the host!"); 6344 6345 CGBuilderTy &Bld = CGF.Builder; 6346 6347 // If the target directive is combined with a teams directive: 6348 // Return the value in the num_teams clause, if any. 6349 // Otherwise, return 0 to denote the runtime default. 6350 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 6351 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 6352 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6353 llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 6354 /*IgnoreResultAssign*/ true); 6355 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6356 /*IsSigned=*/true); 6357 } 6358 6359 // The default value is 0. 6360 return Bld.getInt32(0); 6361 } 6362 6363 // If the target directive is combined with a parallel directive but not a 6364 // teams directive, start one team. 6365 if (isOpenMPParallelDirective(D.getDirectiveKind())) 6366 return Bld.getInt32(1); 6367 6368 // If the current target region has a teams region enclosed, we need to get 6369 // the number of teams to pass to the runtime function call. This is done 6370 // by generating the expression in a inlined region. This is required because 6371 // the expression is captured in the enclosing target environment when the 6372 // teams directive is not combined with target. 6373 6374 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6375 6376 if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6377 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6378 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6379 if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 6380 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6381 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6382 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 6383 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6384 /*IsSigned=*/true); 6385 } 6386 6387 // If we have an enclosed teams directive but no num_teams clause we use 6388 // the default value 0. 6389 return Bld.getInt32(0); 6390 } 6391 } 6392 6393 // No teams associated with the directive. 6394 return nullptr; 6395 } 6396 6397 /// Emit the number of threads for a target directive. Inspect the 6398 /// thread_limit clause associated with a teams construct combined or closely 6399 /// nested with the target directive. 6400 /// 6401 /// Emit the num_threads clause for directives such as 'target parallel' that 6402 /// have no associated teams construct. 6403 /// 6404 /// Otherwise, return nullptr. 6405 static llvm::Value * 6406 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6407 CodeGenFunction &CGF, 6408 const OMPExecutableDirective &D) { 6409 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6410 "teams directive expected to be " 6411 "emitted only for the host!"); 6412 6413 CGBuilderTy &Bld = CGF.Builder; 6414 6415 // 6416 // If the target directive is combined with a teams directive: 6417 // Return the value in the thread_limit clause, if any. 6418 // 6419 // If the target directive is combined with a parallel directive: 6420 // Return the value in the num_threads clause, if any. 6421 // 6422 // If both clauses are set, select the minimum of the two. 6423 // 6424 // If neither teams or parallel combined directives set the number of threads 6425 // in a team, return 0 to denote the runtime default. 6426 // 6427 // If this is not a teams directive return nullptr. 6428 6429 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 6430 isOpenMPParallelDirective(D.getDirectiveKind())) { 6431 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 6432 llvm::Value *NumThreadsVal = nullptr; 6433 llvm::Value *ThreadLimitVal = nullptr; 6434 6435 if (const auto *ThreadLimitClause = 6436 D.getSingleClause<OMPThreadLimitClause>()) { 6437 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6438 llvm::Value *ThreadLimit = 6439 CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 6440 /*IgnoreResultAssign*/ true); 6441 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6442 /*IsSigned=*/true); 6443 } 6444 6445 if (const auto *NumThreadsClause = 6446 D.getSingleClause<OMPNumThreadsClause>()) { 6447 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6448 llvm::Value *NumThreads = 6449 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 6450 /*IgnoreResultAssign*/ true); 6451 NumThreadsVal = 6452 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 6453 } 6454 6455 // Select the lesser of thread_limit and num_threads. 6456 if (NumThreadsVal) 6457 ThreadLimitVal = ThreadLimitVal 6458 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 6459 ThreadLimitVal), 6460 NumThreadsVal, ThreadLimitVal) 6461 : NumThreadsVal; 6462 6463 // Set default value passed to the runtime if either teams or a target 6464 // parallel type directive is found but no clause is specified. 6465 if (!ThreadLimitVal) 6466 ThreadLimitVal = DefaultThreadLimitVal; 6467 6468 return ThreadLimitVal; 6469 } 6470 6471 // If the current target region has a teams region enclosed, we need to get 6472 // the thread limit to pass to the runtime function call. This is done 6473 // by generating the expression in a inlined region. This is required because 6474 // the expression is captured in the enclosing target environment when the 6475 // teams directive is not combined with target. 6476 6477 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6478 6479 if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6480 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6481 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6482 if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 6483 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6484 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6485 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 6486 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6487 /*IsSigned=*/true); 6488 } 6489 6490 // If we have an enclosed teams directive but no thread_limit clause we 6491 // use the default value 0. 6492 return CGF.Builder.getInt32(0); 6493 } 6494 } 6495 6496 // No teams associated with the directive. 6497 return nullptr; 6498 } 6499 6500 namespace { 6501 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6502 6503 // Utility to handle information from clauses associated with a given 6504 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6505 // It provides a convenient interface to obtain the information and generate 6506 // code for that information. 6507 class MappableExprsHandler { 6508 public: 6509 /// Values for bit flags used to specify the mapping type for 6510 /// offloading. 6511 enum OpenMPOffloadMappingFlags : uint64_t { 6512 /// No flags 6513 OMP_MAP_NONE = 0x0, 6514 /// Allocate memory on the device and move data from host to device. 6515 OMP_MAP_TO = 0x01, 6516 /// Allocate memory on the device and move data from device to host. 6517 OMP_MAP_FROM = 0x02, 6518 /// Always perform the requested mapping action on the element, even 6519 /// if it was already mapped before. 6520 OMP_MAP_ALWAYS = 0x04, 6521 /// Delete the element from the device environment, ignoring the 6522 /// current reference count associated with the element. 6523 OMP_MAP_DELETE = 0x08, 6524 /// The element being mapped is a pointer-pointee pair; both the 6525 /// pointer and the pointee should be mapped. 6526 OMP_MAP_PTR_AND_OBJ = 0x10, 6527 /// This flags signals that the base address of an entry should be 6528 /// passed to the target kernel as an argument. 6529 OMP_MAP_TARGET_PARAM = 0x20, 6530 /// Signal that the runtime library has to return the device pointer 6531 /// in the current position for the data being mapped. Used when we have the 6532 /// use_device_ptr clause. 6533 OMP_MAP_RETURN_PARAM = 0x40, 6534 /// This flag signals that the reference being passed is a pointer to 6535 /// private data. 6536 OMP_MAP_PRIVATE = 0x80, 6537 /// Pass the element to the device by value. 6538 OMP_MAP_LITERAL = 0x100, 6539 /// Implicit map 6540 OMP_MAP_IMPLICIT = 0x200, 6541 /// The 16 MSBs of the flags indicate whether the entry is member of some 6542 /// struct/class. 6543 OMP_MAP_MEMBER_OF = 0xffff000000000000, 6544 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 6545 }; 6546 6547 /// Class that associates information with a base pointer to be passed to the 6548 /// runtime library. 6549 class BasePointerInfo { 6550 /// The base pointer. 6551 llvm::Value *Ptr = nullptr; 6552 /// The base declaration that refers to this device pointer, or null if 6553 /// there is none. 6554 const ValueDecl *DevPtrDecl = nullptr; 6555 6556 public: 6557 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 6558 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 6559 llvm::Value *operator*() const { return Ptr; } 6560 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 6561 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 6562 }; 6563 6564 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 6565 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 6566 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 6567 6568 /// Map between a struct and the its lowest & highest elements which have been 6569 /// mapped. 6570 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6571 /// HE(FieldIndex, Pointer)} 6572 struct StructRangeInfoTy { 6573 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6574 0, Address::invalid()}; 6575 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 6576 0, Address::invalid()}; 6577 Address Base = Address::invalid(); 6578 }; 6579 6580 private: 6581 /// Kind that defines how a device pointer has to be returned. 6582 struct MapInfo { 6583 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6584 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6585 OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; 6586 bool ReturnDevicePointer = false; 6587 bool IsImplicit = false; 6588 6589 MapInfo() = default; 6590 MapInfo( 6591 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6592 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6593 bool ReturnDevicePointer, bool IsImplicit) 6594 : Components(Components), MapType(MapType), 6595 MapTypeModifier(MapTypeModifier), 6596 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 6597 }; 6598 6599 /// If use_device_ptr is used on a pointer which is a struct member and there 6600 /// is no map information about it, then emission of that entry is deferred 6601 /// until the whole struct has been processed. 6602 struct DeferredDevicePtrEntryTy { 6603 const Expr *IE = nullptr; 6604 const ValueDecl *VD = nullptr; 6605 6606 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 6607 : IE(IE), VD(VD) {} 6608 }; 6609 6610 /// Directive from where the map clauses were extracted. 6611 const OMPExecutableDirective &CurDir; 6612 6613 /// Function the directive is being generated for. 6614 CodeGenFunction &CGF; 6615 6616 /// Set of all first private variables in the current directive. 6617 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 6618 6619 /// Map between device pointer declarations and their expression components. 6620 /// The key value for declarations in 'this' is null. 6621 llvm::DenseMap< 6622 const ValueDecl *, 6623 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6624 DevPointersMap; 6625 6626 llvm::Value *getExprTypeSize(const Expr *E) const { 6627 QualType ExprTy = E->getType().getCanonicalType(); 6628 6629 // Reference types are ignored for mapping purposes. 6630 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6631 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6632 6633 // Given that an array section is considered a built-in type, we need to 6634 // do the calculation based on the length of the section instead of relying 6635 // on CGF.getTypeSize(E->getType()). 6636 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 6637 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 6638 OAE->getBase()->IgnoreParenImpCasts()) 6639 .getCanonicalType(); 6640 6641 // If there is no length associated with the expression, that means we 6642 // are using the whole length of the base. 6643 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 6644 return CGF.getTypeSize(BaseTy); 6645 6646 llvm::Value *ElemSize; 6647 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 6648 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6649 } else { 6650 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6651 assert(ATy && "Expecting array type if not a pointer type."); 6652 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6653 } 6654 6655 // If we don't have a length at this point, that is because we have an 6656 // array section with a single element. 6657 if (!OAE->getLength()) 6658 return ElemSize; 6659 6660 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 6661 LengthVal = 6662 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 6663 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6664 } 6665 return CGF.getTypeSize(ExprTy); 6666 } 6667 6668 /// Return the corresponding bits for a given map clause modifier. Add 6669 /// a flag marking the map as a pointer if requested. Add a flag marking the 6670 /// map as the first one of a series of maps that relate to the same map 6671 /// expression. 6672 OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType, 6673 OpenMPMapClauseKind MapTypeModifier, 6674 bool IsImplicit, bool AddPtrFlag, 6675 bool AddIsTargetParamFlag) const { 6676 OpenMPOffloadMappingFlags Bits = 6677 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 6678 switch (MapType) { 6679 case OMPC_MAP_alloc: 6680 case OMPC_MAP_release: 6681 // alloc and release is the default behavior in the runtime library, i.e. 6682 // if we don't pass any bits alloc/release that is what the runtime is 6683 // going to do. Therefore, we don't need to signal anything for these two 6684 // type modifiers. 6685 break; 6686 case OMPC_MAP_to: 6687 Bits |= OMP_MAP_TO; 6688 break; 6689 case OMPC_MAP_from: 6690 Bits |= OMP_MAP_FROM; 6691 break; 6692 case OMPC_MAP_tofrom: 6693 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 6694 break; 6695 case OMPC_MAP_delete: 6696 Bits |= OMP_MAP_DELETE; 6697 break; 6698 case OMPC_MAP_always: 6699 case OMPC_MAP_unknown: 6700 llvm_unreachable("Unexpected map type!"); 6701 } 6702 if (AddPtrFlag) 6703 Bits |= OMP_MAP_PTR_AND_OBJ; 6704 if (AddIsTargetParamFlag) 6705 Bits |= OMP_MAP_TARGET_PARAM; 6706 if (MapTypeModifier == OMPC_MAP_always) 6707 Bits |= OMP_MAP_ALWAYS; 6708 return Bits; 6709 } 6710 6711 /// Return true if the provided expression is a final array section. A 6712 /// final array section, is one whose length can't be proved to be one. 6713 bool isFinalArraySectionExpression(const Expr *E) const { 6714 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 6715 6716 // It is not an array section and therefore not a unity-size one. 6717 if (!OASE) 6718 return false; 6719 6720 // An array section with no colon always refer to a single element. 6721 if (OASE->getColonLoc().isInvalid()) 6722 return false; 6723 6724 const Expr *Length = OASE->getLength(); 6725 6726 // If we don't have a length we have to check if the array has size 1 6727 // for this dimension. Also, we should always expect a length if the 6728 // base type is pointer. 6729 if (!Length) { 6730 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 6731 OASE->getBase()->IgnoreParenImpCasts()) 6732 .getCanonicalType(); 6733 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6734 return ATy->getSize().getSExtValue() != 1; 6735 // If we don't have a constant dimension length, we have to consider 6736 // the current section as having any size, so it is not necessarily 6737 // unitary. If it happen to be unity size, that's user fault. 6738 return true; 6739 } 6740 6741 // Check if the length evaluates to 1. 6742 llvm::APSInt ConstLength; 6743 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 6744 return true; // Can have more that size 1. 6745 6746 return ConstLength.getSExtValue() != 1; 6747 } 6748 6749 /// Generate the base pointers, section pointers, sizes and map type 6750 /// bits for the provided map type, map modifier, and expression components. 6751 /// \a IsFirstComponent should be set to true if the provided set of 6752 /// components is the first associated with a capture. 6753 void generateInfoForComponentList( 6754 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6755 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6756 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 6757 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 6758 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 6759 bool IsImplicit) const { 6760 // The following summarizes what has to be generated for each map and the 6761 // types below. The generated information is expressed in this order: 6762 // base pointer, section pointer, size, flags 6763 // (to add to the ones that come from the map type and modifier). 6764 // 6765 // double d; 6766 // int i[100]; 6767 // float *p; 6768 // 6769 // struct S1 { 6770 // int i; 6771 // float f[50]; 6772 // } 6773 // struct S2 { 6774 // int i; 6775 // float f[50]; 6776 // S1 s; 6777 // double *p; 6778 // struct S2 *ps; 6779 // } 6780 // S2 s; 6781 // S2 *ps; 6782 // 6783 // map(d) 6784 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 6785 // 6786 // map(i) 6787 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 6788 // 6789 // map(i[1:23]) 6790 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 6791 // 6792 // map(p) 6793 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 6794 // 6795 // map(p[1:24]) 6796 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 6797 // 6798 // map(s) 6799 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 6800 // 6801 // map(s.i) 6802 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 6803 // 6804 // map(s.s.f) 6805 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6806 // 6807 // map(s.p) 6808 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 6809 // 6810 // map(to: s.p[:22]) 6811 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 6812 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 6813 // &(s.p), &(s.p[0]), 22*sizeof(double), 6814 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6815 // (*) alloc space for struct members, only this is a target parameter 6816 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6817 // optimizes this entry out, same in the examples below) 6818 // (***) map the pointee (map: to) 6819 // 6820 // map(s.ps) 6821 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6822 // 6823 // map(from: s.ps->s.i) 6824 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6825 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6826 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6827 // 6828 // map(to: s.ps->ps) 6829 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6830 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6831 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 6832 // 6833 // map(s.ps->ps->ps) 6834 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6835 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6836 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6837 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6838 // 6839 // map(to: s.ps->ps->s.f[:22]) 6840 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6841 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6842 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6843 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6844 // 6845 // map(ps) 6846 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 6847 // 6848 // map(ps->i) 6849 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 6850 // 6851 // map(ps->s.f) 6852 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6853 // 6854 // map(from: ps->p) 6855 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 6856 // 6857 // map(to: ps->p[:22]) 6858 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 6859 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 6860 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 6861 // 6862 // map(ps->ps) 6863 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6864 // 6865 // map(from: ps->ps->s.i) 6866 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6867 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6868 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6869 // 6870 // map(from: ps->ps->ps) 6871 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6872 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6873 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6874 // 6875 // map(ps->ps->ps->ps) 6876 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6877 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6878 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6879 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6880 // 6881 // map(to: ps->ps->ps->s.f[:22]) 6882 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6883 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6884 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6885 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6886 // 6887 // map(to: s.f[:22]) map(from: s.p[:33]) 6888 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 6889 // sizeof(double*) (**), TARGET_PARAM 6890 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 6891 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 6892 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6893 // (*) allocate contiguous space needed to fit all mapped members even if 6894 // we allocate space for members not mapped (in this example, 6895 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 6896 // them as well because they fall between &s.f[0] and &s.p) 6897 // 6898 // map(from: s.f[:22]) map(to: ps->p[:33]) 6899 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 6900 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 6901 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 6902 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 6903 // (*) the struct this entry pertains to is the 2nd element in the list of 6904 // arguments, hence MEMBER_OF(2) 6905 // 6906 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 6907 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 6908 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 6909 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 6910 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 6911 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 6912 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 6913 // (*) the struct this entry pertains to is the 4th element in the list 6914 // of arguments, hence MEMBER_OF(4) 6915 6916 // Track if the map information being generated is the first for a capture. 6917 bool IsCaptureFirstInfo = IsFirstComponentList; 6918 bool IsLink = false; // Is this variable a "declare target link"? 6919 6920 // Scan the components from the base to the complete expression. 6921 auto CI = Components.rbegin(); 6922 auto CE = Components.rend(); 6923 auto I = CI; 6924 6925 // Track if the map information being generated is the first for a list of 6926 // components. 6927 bool IsExpressionFirstInfo = true; 6928 Address BP = Address::invalid(); 6929 6930 if (isa<MemberExpr>(I->getAssociatedExpression())) { 6931 // The base is the 'this' pointer. The content of the pointer is going 6932 // to be the base of the field being mapped. 6933 BP = CGF.LoadCXXThisAddress(); 6934 } else { 6935 // The base is the reference to the variable. 6936 // BP = &Var. 6937 BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 6938 if (const auto *VD = 6939 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 6940 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 6941 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) 6942 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { 6943 IsLink = true; 6944 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 6945 } 6946 } 6947 6948 // If the variable is a pointer and is being dereferenced (i.e. is not 6949 // the last component), the base has to be the pointer itself, not its 6950 // reference. References are ignored for mapping purposes. 6951 QualType Ty = 6952 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 6953 if (Ty->isAnyPointerType() && std::next(I) != CE) { 6954 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 6955 6956 // We do not need to generate individual map information for the 6957 // pointer, it can be associated with the combined storage. 6958 ++I; 6959 } 6960 } 6961 6962 // Track whether a component of the list should be marked as MEMBER_OF some 6963 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 6964 // in a component list should be marked as MEMBER_OF, all subsequent entries 6965 // do not belong to the base struct. E.g. 6966 // struct S2 s; 6967 // s.ps->ps->ps->f[:] 6968 // (1) (2) (3) (4) 6969 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 6970 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 6971 // is the pointee of ps(2) which is not member of struct s, so it should not 6972 // be marked as such (it is still PTR_AND_OBJ). 6973 // The variable is initialized to false so that PTR_AND_OBJ entries which 6974 // are not struct members are not considered (e.g. array of pointers to 6975 // data). 6976 bool ShouldBeMemberOf = false; 6977 6978 // Variable keeping track of whether or not we have encountered a component 6979 // in the component list which is a member expression. Useful when we have a 6980 // pointer or a final array section, in which case it is the previous 6981 // component in the list which tells us whether we have a member expression. 6982 // E.g. X.f[:] 6983 // While processing the final array section "[:]" it is "f" which tells us 6984 // whether we are dealing with a member of a declared struct. 6985 const MemberExpr *EncounteredME = nullptr; 6986 6987 for (; I != CE; ++I) { 6988 // If the current component is member of a struct (parent struct) mark it. 6989 if (!EncounteredME) { 6990 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 6991 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 6992 // as MEMBER_OF the parent struct. 6993 if (EncounteredME) 6994 ShouldBeMemberOf = true; 6995 } 6996 6997 auto Next = std::next(I); 6998 6999 // We need to generate the addresses and sizes if this is the last 7000 // component, if the component is a pointer or if it is an array section 7001 // whose length can't be proved to be one. If this is a pointer, it 7002 // becomes the base address for the following components. 7003 7004 // A final array section, is one whose length can't be proved to be one. 7005 bool IsFinalArraySection = 7006 isFinalArraySectionExpression(I->getAssociatedExpression()); 7007 7008 // Get information on whether the element is a pointer. Have to do a 7009 // special treatment for array sections given that they are built-in 7010 // types. 7011 const auto *OASE = 7012 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7013 bool IsPointer = 7014 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7015 .getCanonicalType() 7016 ->isAnyPointerType()) || 7017 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7018 7019 if (Next == CE || IsPointer || IsFinalArraySection) { 7020 // If this is not the last component, we expect the pointer to be 7021 // associated with an array expression or member expression. 7022 assert((Next == CE || 7023 isa<MemberExpr>(Next->getAssociatedExpression()) || 7024 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7025 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7026 "Unexpected expression"); 7027 7028 Address LB = 7029 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7030 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7031 7032 // If this component is a pointer inside the base struct then we don't 7033 // need to create any entry for it - it will be combined with the object 7034 // it is pointing to into a single PTR_AND_OBJ entry. 7035 bool IsMemberPointer = 7036 IsPointer && EncounteredME && 7037 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7038 EncounteredME); 7039 if (!IsMemberPointer) { 7040 BasePointers.push_back(BP.getPointer()); 7041 Pointers.push_back(LB.getPointer()); 7042 Sizes.push_back(Size); 7043 7044 // We need to add a pointer flag for each map that comes from the 7045 // same expression except for the first one. We also need to signal 7046 // this map is the first one that relates with the current capture 7047 // (there is a set of entries for each capture). 7048 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7049 MapType, MapTypeModifier, IsImplicit, 7050 !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); 7051 7052 if (!IsExpressionFirstInfo) { 7053 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7054 // then we reset the TO/FROM/ALWAYS/DELETE flags. 7055 if (IsPointer) 7056 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7057 OMP_MAP_DELETE); 7058 7059 if (ShouldBeMemberOf) { 7060 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7061 // should be later updated with the correct value of MEMBER_OF. 7062 Flags |= OMP_MAP_MEMBER_OF; 7063 // From now on, all subsequent PTR_AND_OBJ entries should not be 7064 // marked as MEMBER_OF. 7065 ShouldBeMemberOf = false; 7066 } 7067 } 7068 7069 Types.push_back(Flags); 7070 } 7071 7072 // If we have encountered a member expression so far, keep track of the 7073 // mapped member. If the parent is "*this", then the value declaration 7074 // is nullptr. 7075 if (EncounteredME) { 7076 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7077 unsigned FieldIndex = FD->getFieldIndex(); 7078 7079 // Update info about the lowest and highest elements for this struct 7080 if (!PartialStruct.Base.isValid()) { 7081 PartialStruct.LowestElem = {FieldIndex, LB}; 7082 PartialStruct.HighestElem = {FieldIndex, LB}; 7083 PartialStruct.Base = BP; 7084 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7085 PartialStruct.LowestElem = {FieldIndex, LB}; 7086 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7087 PartialStruct.HighestElem = {FieldIndex, LB}; 7088 } 7089 } 7090 7091 // If we have a final array section, we are done with this expression. 7092 if (IsFinalArraySection) 7093 break; 7094 7095 // The pointer becomes the base for the next element. 7096 if (Next != CE) 7097 BP = LB; 7098 7099 IsExpressionFirstInfo = false; 7100 IsCaptureFirstInfo = false; 7101 } 7102 } 7103 } 7104 7105 /// Return the adjusted map modifiers if the declaration a capture refers to 7106 /// appears in a first-private clause. This is expected to be used only with 7107 /// directives that start with 'target'. 7108 MappableExprsHandler::OpenMPOffloadMappingFlags 7109 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7110 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7111 7112 // A first private variable captured by reference will use only the 7113 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7114 // declaration is known as first-private in this handler. 7115 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 7116 return MappableExprsHandler::OMP_MAP_PRIVATE | 7117 MappableExprsHandler::OMP_MAP_TO; 7118 return MappableExprsHandler::OMP_MAP_TO | 7119 MappableExprsHandler::OMP_MAP_FROM; 7120 } 7121 7122 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7123 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. 7124 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7125 << 48); 7126 } 7127 7128 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7129 OpenMPOffloadMappingFlags MemberOfFlag) { 7130 // If the entry is PTR_AND_OBJ but has not been marked with the special 7131 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7132 // marked as MEMBER_OF. 7133 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7134 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7135 return; 7136 7137 // Reset the placeholder value to prepare the flag for the assignment of the 7138 // proper MEMBER_OF value. 7139 Flags &= ~OMP_MAP_MEMBER_OF; 7140 Flags |= MemberOfFlag; 7141 } 7142 7143 public: 7144 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7145 : CurDir(Dir), CGF(CGF) { 7146 // Extract firstprivate clause information. 7147 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7148 for (const auto *D : C->varlists()) 7149 FirstPrivateDecls.insert( 7150 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 7151 // Extract device pointer clause information. 7152 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7153 for (auto L : C->component_lists()) 7154 DevPointersMap[L.first].push_back(L.second); 7155 } 7156 7157 /// Generate code for the combined entry if we have a partially mapped struct 7158 /// and take care of the mapping flags of the arguments corresponding to 7159 /// individual struct members. 7160 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7161 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7162 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7163 const StructRangeInfoTy &PartialStruct) const { 7164 // Base is the base of the struct 7165 BasePointers.push_back(PartialStruct.Base.getPointer()); 7166 // Pointer is the address of the lowest element 7167 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7168 Pointers.push_back(LB); 7169 // Size is (addr of {highest+1} element) - (addr of lowest element) 7170 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7171 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7172 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7173 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7174 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7175 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy, 7176 /*isSinged=*/false); 7177 Sizes.push_back(Size); 7178 // Map type is always TARGET_PARAM 7179 Types.push_back(OMP_MAP_TARGET_PARAM); 7180 // Remove TARGET_PARAM flag from the first element 7181 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7182 7183 // All other current entries will be MEMBER_OF the combined entry 7184 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7185 // 0xFFFF in the MEMBER_OF field). 7186 OpenMPOffloadMappingFlags MemberOfFlag = 7187 getMemberOfFlag(BasePointers.size() - 1); 7188 for (auto &M : CurTypes) 7189 setCorrectMemberOfFlag(M, MemberOfFlag); 7190 } 7191 7192 /// Generate all the base pointers, section pointers, sizes and map 7193 /// types for the extracted mappable expressions. Also, for each item that 7194 /// relates with a device pointer, a pair of the relevant declaration and 7195 /// index where it occurs is appended to the device pointers info array. 7196 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7197 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7198 MapFlagsArrayTy &Types) const { 7199 // We have to process the component lists that relate with the same 7200 // declaration in a single chunk so that we can generate the map flags 7201 // correctly. Therefore, we organize all lists in a map. 7202 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7203 7204 // Helper function to fill the information map for the different supported 7205 // clauses. 7206 auto &&InfoGen = [&Info]( 7207 const ValueDecl *D, 7208 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7209 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 7210 bool ReturnDevicePointer, bool IsImplicit) { 7211 const ValueDecl *VD = 7212 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7213 Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, 7214 IsImplicit); 7215 }; 7216 7217 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7218 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7219 for (const auto &L : C->component_lists()) { 7220 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 7221 /*ReturnDevicePointer=*/false, C->isImplicit()); 7222 } 7223 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 7224 for (const auto &L : C->component_lists()) { 7225 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 7226 /*ReturnDevicePointer=*/false, C->isImplicit()); 7227 } 7228 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 7229 for (const auto &L : C->component_lists()) { 7230 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 7231 /*ReturnDevicePointer=*/false, C->isImplicit()); 7232 } 7233 7234 // Look at the use_device_ptr clause information and mark the existing map 7235 // entries as such. If there is no map information for an entry in the 7236 // use_device_ptr list, we create one with map type 'alloc' and zero size 7237 // section. It is the user fault if that was not mapped before. If there is 7238 // no map information and the pointer is a struct member, then we defer the 7239 // emission of that entry until the whole struct has been processed. 7240 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7241 DeferredInfo; 7242 7243 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7244 for (const auto *C : 7245 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { 7246 for (const auto &L : C->component_lists()) { 7247 assert(!L.second.empty() && "Not expecting empty list of components!"); 7248 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7249 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7250 const Expr *IE = L.second.back().getAssociatedExpression(); 7251 // If the first component is a member expression, we have to look into 7252 // 'this', which maps to null in the map of map information. Otherwise 7253 // look directly for the information. 7254 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7255 7256 // We potentially have map information for this declaration already. 7257 // Look for the first set of components that refer to it. 7258 if (It != Info.end()) { 7259 auto CI = std::find_if( 7260 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7261 return MI.Components.back().getAssociatedDeclaration() == VD; 7262 }); 7263 // If we found a map entry, signal that the pointer has to be returned 7264 // and move on to the next declaration. 7265 if (CI != It->second.end()) { 7266 CI->ReturnDevicePointer = true; 7267 continue; 7268 } 7269 } 7270 7271 // We didn't find any match in our map information - generate a zero 7272 // size array section - if the pointer is a struct member we defer this 7273 // action until the whole struct has been processed. 7274 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 7275 if (isa<MemberExpr>(IE)) { 7276 // Insert the pointer into Info to be processed by 7277 // generateInfoForComponentList. Because it is a member pointer 7278 // without a pointee, no entry will be generated for it, therefore 7279 // we need to generate one after the whole struct has been processed. 7280 // Nonetheless, generateInfoForComponentList must be called to take 7281 // the pointer into account for the calculation of the range of the 7282 // partial struct. 7283 InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown, 7284 /*ReturnDevicePointer=*/false, C->isImplicit()); 7285 DeferredInfo[nullptr].emplace_back(IE, VD); 7286 } else { 7287 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7288 this->CGF.EmitLValue(IE), IE->getExprLoc()); 7289 BasePointers.emplace_back(Ptr, VD); 7290 Pointers.push_back(Ptr); 7291 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7292 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 7293 } 7294 } 7295 } 7296 7297 for (const auto &M : Info) { 7298 // We need to know when we generate information for the first component 7299 // associated with a capture, because the mapping flags depend on it. 7300 bool IsFirstComponentList = true; 7301 7302 // Temporary versions of arrays 7303 MapBaseValuesArrayTy CurBasePointers; 7304 MapValuesArrayTy CurPointers; 7305 MapValuesArrayTy CurSizes; 7306 MapFlagsArrayTy CurTypes; 7307 StructRangeInfoTy PartialStruct; 7308 7309 for (const MapInfo &L : M.second) { 7310 assert(!L.Components.empty() && 7311 "Not expecting declaration with no component lists."); 7312 7313 // Remember the current base pointer index. 7314 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 7315 // FIXME: MSVC 2013 seems to require this-> to find the member method. 7316 this->generateInfoForComponentList( 7317 L.MapType, L.MapTypeModifier, L.Components, CurBasePointers, 7318 CurPointers, CurSizes, CurTypes, PartialStruct, 7319 IsFirstComponentList, L.IsImplicit); 7320 7321 // If this entry relates with a device pointer, set the relevant 7322 // declaration and add the 'return pointer' flag. 7323 if (L.ReturnDevicePointer) { 7324 assert(CurBasePointers.size() > CurrentBasePointersIdx && 7325 "Unexpected number of mapped base pointers."); 7326 7327 const ValueDecl *RelevantVD = 7328 L.Components.back().getAssociatedDeclaration(); 7329 assert(RelevantVD && 7330 "No relevant declaration related with device pointer??"); 7331 7332 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 7333 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 7334 } 7335 IsFirstComponentList = false; 7336 } 7337 7338 // Append any pending zero-length pointers which are struct members and 7339 // used with use_device_ptr. 7340 auto CI = DeferredInfo.find(M.first); 7341 if (CI != DeferredInfo.end()) { 7342 for (const DeferredDevicePtrEntryTy &L : CI->second) { 7343 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 7344 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7345 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 7346 CurBasePointers.emplace_back(BasePtr, L.VD); 7347 CurPointers.push_back(Ptr); 7348 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7349 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 7350 // value MEMBER_OF=FFFF so that the entry is later updated with the 7351 // correct value of MEMBER_OF. 7352 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 7353 OMP_MAP_MEMBER_OF); 7354 } 7355 } 7356 7357 // If there is an entry in PartialStruct it means we have a struct with 7358 // individual members mapped. Emit an extra combined entry. 7359 if (PartialStruct.Base.isValid()) 7360 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 7361 PartialStruct); 7362 7363 // We need to append the results of this capture to what we already have. 7364 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7365 Pointers.append(CurPointers.begin(), CurPointers.end()); 7366 Sizes.append(CurSizes.begin(), CurSizes.end()); 7367 Types.append(CurTypes.begin(), CurTypes.end()); 7368 } 7369 } 7370 7371 /// Generate the base pointers, section pointers, sizes and map types 7372 /// associated to a given capture. 7373 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 7374 llvm::Value *Arg, 7375 MapBaseValuesArrayTy &BasePointers, 7376 MapValuesArrayTy &Pointers, 7377 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7378 StructRangeInfoTy &PartialStruct) const { 7379 assert(!Cap->capturesVariableArrayType() && 7380 "Not expecting to generate map info for a variable array type!"); 7381 7382 // We need to know when we generating information for the first component 7383 // associated with a capture, because the mapping flags depend on it. 7384 bool IsFirstComponentList = true; 7385 7386 const ValueDecl *VD = Cap->capturesThis() 7387 ? nullptr 7388 : Cap->getCapturedVar()->getCanonicalDecl(); 7389 7390 // If this declaration appears in a is_device_ptr clause we just have to 7391 // pass the pointer by value. If it is a reference to a declaration, we just 7392 // pass its value. 7393 if (DevPointersMap.count(VD)) { 7394 BasePointers.emplace_back(Arg, VD); 7395 Pointers.push_back(Arg); 7396 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 7397 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 7398 return; 7399 } 7400 7401 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7402 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7403 for (const auto &L : C->decl_component_lists(VD)) { 7404 assert(L.first == VD && 7405 "We got information for the wrong declaration??"); 7406 assert(!L.second.empty() && 7407 "Not expecting declaration with no component lists."); 7408 generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), 7409 L.second, BasePointers, Pointers, Sizes, 7410 Types, PartialStruct, IsFirstComponentList, 7411 C->isImplicit()); 7412 IsFirstComponentList = false; 7413 } 7414 } 7415 7416 /// Generate the base pointers, section pointers, sizes and map types 7417 /// associated with the declare target link variables. 7418 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 7419 MapValuesArrayTy &Pointers, 7420 MapValuesArrayTy &Sizes, 7421 MapFlagsArrayTy &Types) const { 7422 // Map other list items in the map clause which are not captured variables 7423 // but "declare target link" global variables., 7424 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 7425 for (const auto &L : C->component_lists()) { 7426 if (!L.first) 7427 continue; 7428 const auto *VD = dyn_cast<VarDecl>(L.first); 7429 if (!VD) 7430 continue; 7431 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7432 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 7433 if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 7434 continue; 7435 StructRangeInfoTy PartialStruct; 7436 generateInfoForComponentList( 7437 C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, 7438 Pointers, Sizes, Types, PartialStruct, 7439 /*IsFirstComponentList=*/true, C->isImplicit()); 7440 assert(!PartialStruct.Base.isValid() && 7441 "No partial structs for declare target link expected."); 7442 } 7443 } 7444 } 7445 7446 /// Generate the default map information for a given capture \a CI, 7447 /// record field declaration \a RI and captured value \a CV. 7448 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 7449 const FieldDecl &RI, llvm::Value *CV, 7450 MapBaseValuesArrayTy &CurBasePointers, 7451 MapValuesArrayTy &CurPointers, 7452 MapValuesArrayTy &CurSizes, 7453 MapFlagsArrayTy &CurMapTypes) const { 7454 // Do the default mapping. 7455 if (CI.capturesThis()) { 7456 CurBasePointers.push_back(CV); 7457 CurPointers.push_back(CV); 7458 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 7459 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 7460 // Default map type. 7461 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 7462 } else if (CI.capturesVariableByCopy()) { 7463 CurBasePointers.push_back(CV); 7464 CurPointers.push_back(CV); 7465 if (!RI.getType()->isAnyPointerType()) { 7466 // We have to signal to the runtime captures passed by value that are 7467 // not pointers. 7468 CurMapTypes.push_back(OMP_MAP_LITERAL); 7469 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 7470 } else { 7471 // Pointers are implicitly mapped with a zero size and no flags 7472 // (other than first map that is added for all implicit maps). 7473 CurMapTypes.push_back(OMP_MAP_NONE); 7474 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 7475 } 7476 } else { 7477 assert(CI.capturesVariable() && "Expected captured reference."); 7478 CurBasePointers.push_back(CV); 7479 CurPointers.push_back(CV); 7480 7481 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 7482 QualType ElementType = PtrTy->getPointeeType(); 7483 CurSizes.push_back(CGF.getTypeSize(ElementType)); 7484 // The default map type for a scalar/complex type is 'to' because by 7485 // default the value doesn't have to be retrieved. For an aggregate 7486 // type, the default is 'tofrom'. 7487 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 7488 } 7489 // Every default map produces a single argument which is a target parameter. 7490 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 7491 7492 // Add flag stating this is an implicit map. 7493 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 7494 } 7495 }; 7496 7497 enum OpenMPOffloadingReservedDeviceIDs { 7498 /// Device ID if the device was not defined, runtime should get it 7499 /// from environment variables in the spec. 7500 OMP_DEVICEID_UNDEF = -1, 7501 }; 7502 } // anonymous namespace 7503 7504 /// Emit the arrays used to pass the captures and map information to the 7505 /// offloading runtime library. If there is no map or capture information, 7506 /// return nullptr by reference. 7507 static void 7508 emitOffloadingArrays(CodeGenFunction &CGF, 7509 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 7510 MappableExprsHandler::MapValuesArrayTy &Pointers, 7511 MappableExprsHandler::MapValuesArrayTy &Sizes, 7512 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 7513 CGOpenMPRuntime::TargetDataInfo &Info) { 7514 CodeGenModule &CGM = CGF.CGM; 7515 ASTContext &Ctx = CGF.getContext(); 7516 7517 // Reset the array information. 7518 Info.clearArrayInfo(); 7519 Info.NumberOfPtrs = BasePointers.size(); 7520 7521 if (Info.NumberOfPtrs) { 7522 // Detect if we have any capture size requiring runtime evaluation of the 7523 // size so that a constant array could be eventually used. 7524 bool hasRuntimeEvaluationCaptureSize = false; 7525 for (llvm::Value *S : Sizes) 7526 if (!isa<llvm::Constant>(S)) { 7527 hasRuntimeEvaluationCaptureSize = true; 7528 break; 7529 } 7530 7531 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 7532 QualType PointerArrayType = 7533 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 7534 /*IndexTypeQuals=*/0); 7535 7536 Info.BasePointersArray = 7537 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 7538 Info.PointersArray = 7539 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 7540 7541 // If we don't have any VLA types or other types that require runtime 7542 // evaluation, we can use a constant array for the map sizes, otherwise we 7543 // need to fill up the arrays as we do for the pointers. 7544 if (hasRuntimeEvaluationCaptureSize) { 7545 QualType SizeArrayType = Ctx.getConstantArrayType( 7546 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 7547 /*IndexTypeQuals=*/0); 7548 Info.SizesArray = 7549 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 7550 } else { 7551 // We expect all the sizes to be constant, so we collect them to create 7552 // a constant array. 7553 SmallVector<llvm::Constant *, 16> ConstSizes; 7554 for (llvm::Value *S : Sizes) 7555 ConstSizes.push_back(cast<llvm::Constant>(S)); 7556 7557 auto *SizesArrayInit = llvm::ConstantArray::get( 7558 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 7559 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 7560 auto *SizesArrayGbl = new llvm::GlobalVariable( 7561 CGM.getModule(), SizesArrayInit->getType(), 7562 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 7563 SizesArrayInit, Name); 7564 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 7565 Info.SizesArray = SizesArrayGbl; 7566 } 7567 7568 // The map types are always constant so we don't need to generate code to 7569 // fill arrays. Instead, we create an array constant. 7570 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 7571 llvm::copy(MapTypes, Mapping.begin()); 7572 llvm::Constant *MapTypesArrayInit = 7573 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 7574 std::string MaptypesName = 7575 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 7576 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 7577 CGM.getModule(), MapTypesArrayInit->getType(), 7578 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 7579 MapTypesArrayInit, MaptypesName); 7580 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 7581 Info.MapTypesArray = MapTypesArrayGbl; 7582 7583 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 7584 llvm::Value *BPVal = *BasePointers[I]; 7585 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 7586 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7587 Info.BasePointersArray, 0, I); 7588 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7589 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 7590 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 7591 CGF.Builder.CreateStore(BPVal, BPAddr); 7592 7593 if (Info.requiresDevicePointerInfo()) 7594 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 7595 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 7596 7597 llvm::Value *PVal = Pointers[I]; 7598 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 7599 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7600 Info.PointersArray, 0, I); 7601 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7602 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 7603 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 7604 CGF.Builder.CreateStore(PVal, PAddr); 7605 7606 if (hasRuntimeEvaluationCaptureSize) { 7607 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 7608 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 7609 Info.SizesArray, 7610 /*Idx0=*/0, 7611 /*Idx1=*/I); 7612 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 7613 CGF.Builder.CreateStore( 7614 CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), 7615 SAddr); 7616 } 7617 } 7618 } 7619 } 7620 /// Emit the arguments to be passed to the runtime library based on the 7621 /// arrays of pointers, sizes and map types. 7622 static void emitOffloadingArraysArgument( 7623 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 7624 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 7625 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 7626 CodeGenModule &CGM = CGF.CGM; 7627 if (Info.NumberOfPtrs) { 7628 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7629 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7630 Info.BasePointersArray, 7631 /*Idx0=*/0, /*Idx1=*/0); 7632 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7633 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 7634 Info.PointersArray, 7635 /*Idx0=*/0, 7636 /*Idx1=*/0); 7637 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7638 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 7639 /*Idx0=*/0, /*Idx1=*/0); 7640 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 7641 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 7642 Info.MapTypesArray, 7643 /*Idx0=*/0, 7644 /*Idx1=*/0); 7645 } else { 7646 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 7647 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 7648 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 7649 MapTypesArrayArg = 7650 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 7651 } 7652 } 7653 7654 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 7655 const OMPExecutableDirective &D, 7656 llvm::Value *OutlinedFn, 7657 llvm::Value *OutlinedFnID, 7658 const Expr *IfCond, const Expr *Device) { 7659 if (!CGF.HaveInsertPoint()) 7660 return; 7661 7662 assert(OutlinedFn && "Invalid outlined function!"); 7663 7664 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 7665 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 7666 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 7667 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 7668 PrePostActionTy &) { 7669 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7670 }; 7671 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 7672 7673 CodeGenFunction::OMPTargetDataInfo InputInfo; 7674 llvm::Value *MapTypesArray = nullptr; 7675 // Fill up the pointer arrays and transfer execution to the device. 7676 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 7677 &MapTypesArray, &CS, RequiresOuterTask, 7678 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 7679 // On top of the arrays that were filled up, the target offloading call 7680 // takes as arguments the device id as well as the host pointer. The host 7681 // pointer is used by the runtime library to identify the current target 7682 // region, so it only has to be unique and not necessarily point to 7683 // anything. It could be the pointer to the outlined function that 7684 // implements the target region, but we aren't using that so that the 7685 // compiler doesn't need to keep that, and could therefore inline the host 7686 // function if proven worthwhile during optimization. 7687 7688 // From this point on, we need to have an ID of the target region defined. 7689 assert(OutlinedFnID && "Invalid outlined function ID!"); 7690 7691 // Emit device ID if any. 7692 llvm::Value *DeviceID; 7693 if (Device) { 7694 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7695 CGF.Int64Ty, /*isSigned=*/true); 7696 } else { 7697 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 7698 } 7699 7700 // Emit the number of elements in the offloading arrays. 7701 llvm::Value *PointerNum = 7702 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 7703 7704 // Return value of the runtime offloading call. 7705 llvm::Value *Return; 7706 7707 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); 7708 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); 7709 7710 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 7711 // The target region is an outlined function launched by the runtime 7712 // via calls __tgt_target() or __tgt_target_teams(). 7713 // 7714 // __tgt_target() launches a target region with one team and one thread, 7715 // executing a serial region. This master thread may in turn launch 7716 // more threads within its team upon encountering a parallel region, 7717 // however, no additional teams can be launched on the device. 7718 // 7719 // __tgt_target_teams() launches a target region with one or more teams, 7720 // each with one or more threads. This call is required for target 7721 // constructs such as: 7722 // 'target teams' 7723 // 'target' / 'teams' 7724 // 'target teams distribute parallel for' 7725 // 'target parallel' 7726 // and so on. 7727 // 7728 // Note that on the host and CPU targets, the runtime implementation of 7729 // these calls simply call the outlined function without forking threads. 7730 // The outlined functions themselves have runtime calls to 7731 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 7732 // the compiler in emitTeamsCall() and emitParallelCall(). 7733 // 7734 // In contrast, on the NVPTX target, the implementation of 7735 // __tgt_target_teams() launches a GPU kernel with the requested number 7736 // of teams and threads so no additional calls to the runtime are required. 7737 if (NumTeams) { 7738 // If we have NumTeams defined this means that we have an enclosed teams 7739 // region. Therefore we also expect to have NumThreads defined. These two 7740 // values should be defined in the presence of a teams directive, 7741 // regardless of having any clauses associated. If the user is using teams 7742 // but no clauses, these two values will be the default that should be 7743 // passed to the runtime library - a 32-bit integer with the value zero. 7744 assert(NumThreads && "Thread limit expression should be available along " 7745 "with number of teams."); 7746 llvm::Value *OffloadingArgs[] = {DeviceID, 7747 OutlinedFnID, 7748 PointerNum, 7749 InputInfo.BasePointersArray.getPointer(), 7750 InputInfo.PointersArray.getPointer(), 7751 InputInfo.SizesArray.getPointer(), 7752 MapTypesArray, 7753 NumTeams, 7754 NumThreads}; 7755 Return = CGF.EmitRuntimeCall( 7756 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 7757 : OMPRTL__tgt_target_teams), 7758 OffloadingArgs); 7759 } else { 7760 llvm::Value *OffloadingArgs[] = {DeviceID, 7761 OutlinedFnID, 7762 PointerNum, 7763 InputInfo.BasePointersArray.getPointer(), 7764 InputInfo.PointersArray.getPointer(), 7765 InputInfo.SizesArray.getPointer(), 7766 MapTypesArray}; 7767 Return = CGF.EmitRuntimeCall( 7768 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 7769 : OMPRTL__tgt_target), 7770 OffloadingArgs); 7771 } 7772 7773 // Check the error code and execute the host version if required. 7774 llvm::BasicBlock *OffloadFailedBlock = 7775 CGF.createBasicBlock("omp_offload.failed"); 7776 llvm::BasicBlock *OffloadContBlock = 7777 CGF.createBasicBlock("omp_offload.cont"); 7778 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 7779 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 7780 7781 CGF.EmitBlock(OffloadFailedBlock); 7782 if (RequiresOuterTask) { 7783 CapturedVars.clear(); 7784 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7785 } 7786 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 7787 CGF.EmitBranch(OffloadContBlock); 7788 7789 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 7790 }; 7791 7792 // Notify that the host version must be executed. 7793 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 7794 RequiresOuterTask](CodeGenFunction &CGF, 7795 PrePostActionTy &) { 7796 if (RequiresOuterTask) { 7797 CapturedVars.clear(); 7798 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 7799 } 7800 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 7801 }; 7802 7803 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 7804 &CapturedVars, RequiresOuterTask, 7805 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 7806 // Fill up the arrays with all the captured variables. 7807 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7808 MappableExprsHandler::MapValuesArrayTy Pointers; 7809 MappableExprsHandler::MapValuesArrayTy Sizes; 7810 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7811 7812 // Get mappable expression information. 7813 MappableExprsHandler MEHandler(D, CGF); 7814 7815 auto RI = CS.getCapturedRecordDecl()->field_begin(); 7816 auto CV = CapturedVars.begin(); 7817 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 7818 CE = CS.capture_end(); 7819 CI != CE; ++CI, ++RI, ++CV) { 7820 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 7821 MappableExprsHandler::MapValuesArrayTy CurPointers; 7822 MappableExprsHandler::MapValuesArrayTy CurSizes; 7823 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 7824 MappableExprsHandler::StructRangeInfoTy PartialStruct; 7825 7826 // VLA sizes are passed to the outlined region by copy and do not have map 7827 // information associated. 7828 if (CI->capturesVariableArrayType()) { 7829 CurBasePointers.push_back(*CV); 7830 CurPointers.push_back(*CV); 7831 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 7832 // Copy to the device as an argument. No need to retrieve it. 7833 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 7834 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 7835 } else { 7836 // If we have any information in the map clause, we use it, otherwise we 7837 // just do a default mapping. 7838 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 7839 CurSizes, CurMapTypes, PartialStruct); 7840 if (CurBasePointers.empty()) 7841 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 7842 CurPointers, CurSizes, CurMapTypes); 7843 } 7844 // We expect to have at least an element of information for this capture. 7845 assert(!CurBasePointers.empty() && 7846 "Non-existing map pointer for capture!"); 7847 assert(CurBasePointers.size() == CurPointers.size() && 7848 CurBasePointers.size() == CurSizes.size() && 7849 CurBasePointers.size() == CurMapTypes.size() && 7850 "Inconsistent map information sizes!"); 7851 7852 // If there is an entry in PartialStruct it means we have a struct with 7853 // individual members mapped. Emit an extra combined entry. 7854 if (PartialStruct.Base.isValid()) 7855 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 7856 CurMapTypes, PartialStruct); 7857 7858 // We need to append the results of this capture to what we already have. 7859 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7860 Pointers.append(CurPointers.begin(), CurPointers.end()); 7861 Sizes.append(CurSizes.begin(), CurSizes.end()); 7862 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 7863 } 7864 // Map other list items in the map clause which are not captured variables 7865 // but "declare target link" global variables. 7866 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 7867 MapTypes); 7868 7869 TargetDataInfo Info; 7870 // Fill up the arrays and create the arguments. 7871 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7872 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 7873 Info.PointersArray, Info.SizesArray, 7874 Info.MapTypesArray, Info); 7875 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 7876 InputInfo.BasePointersArray = 7877 Address(Info.BasePointersArray, CGM.getPointerAlign()); 7878 InputInfo.PointersArray = 7879 Address(Info.PointersArray, CGM.getPointerAlign()); 7880 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 7881 MapTypesArray = Info.MapTypesArray; 7882 if (RequiresOuterTask) 7883 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 7884 else 7885 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 7886 }; 7887 7888 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 7889 CodeGenFunction &CGF, PrePostActionTy &) { 7890 if (RequiresOuterTask) { 7891 CodeGenFunction::OMPTargetDataInfo InputInfo; 7892 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 7893 } else { 7894 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 7895 } 7896 }; 7897 7898 // If we have a target function ID it means that we need to support 7899 // offloading, otherwise, just execute on the host. We need to execute on host 7900 // regardless of the conditional in the if clause if, e.g., the user do not 7901 // specify target triples. 7902 if (OutlinedFnID) { 7903 if (IfCond) { 7904 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 7905 } else { 7906 RegionCodeGenTy ThenRCG(TargetThenGen); 7907 ThenRCG(CGF); 7908 } 7909 } else { 7910 RegionCodeGenTy ElseRCG(TargetElseGen); 7911 ElseRCG(CGF); 7912 } 7913 } 7914 7915 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 7916 StringRef ParentName) { 7917 if (!S) 7918 return; 7919 7920 // Codegen OMP target directives that offload compute to the device. 7921 bool RequiresDeviceCodegen = 7922 isa<OMPExecutableDirective>(S) && 7923 isOpenMPTargetExecutionDirective( 7924 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 7925 7926 if (RequiresDeviceCodegen) { 7927 const auto &E = *cast<OMPExecutableDirective>(S); 7928 unsigned DeviceID; 7929 unsigned FileID; 7930 unsigned Line; 7931 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 7932 FileID, Line); 7933 7934 // Is this a target region that should not be emitted as an entry point? If 7935 // so just signal we are done with this target region. 7936 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 7937 ParentName, Line)) 7938 return; 7939 7940 switch (E.getDirectiveKind()) { 7941 case OMPD_target: 7942 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 7943 cast<OMPTargetDirective>(E)); 7944 break; 7945 case OMPD_target_parallel: 7946 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7947 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 7948 break; 7949 case OMPD_target_teams: 7950 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 7951 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 7952 break; 7953 case OMPD_target_teams_distribute: 7954 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 7955 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 7956 break; 7957 case OMPD_target_teams_distribute_simd: 7958 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 7959 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 7960 break; 7961 case OMPD_target_parallel_for: 7962 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 7963 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 7964 break; 7965 case OMPD_target_parallel_for_simd: 7966 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 7967 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 7968 break; 7969 case OMPD_target_simd: 7970 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 7971 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 7972 break; 7973 case OMPD_target_teams_distribute_parallel_for: 7974 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 7975 CGM, ParentName, 7976 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 7977 break; 7978 case OMPD_target_teams_distribute_parallel_for_simd: 7979 CodeGenFunction:: 7980 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 7981 CGM, ParentName, 7982 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 7983 break; 7984 case OMPD_parallel: 7985 case OMPD_for: 7986 case OMPD_parallel_for: 7987 case OMPD_parallel_sections: 7988 case OMPD_for_simd: 7989 case OMPD_parallel_for_simd: 7990 case OMPD_cancel: 7991 case OMPD_cancellation_point: 7992 case OMPD_ordered: 7993 case OMPD_threadprivate: 7994 case OMPD_task: 7995 case OMPD_simd: 7996 case OMPD_sections: 7997 case OMPD_section: 7998 case OMPD_single: 7999 case OMPD_master: 8000 case OMPD_critical: 8001 case OMPD_taskyield: 8002 case OMPD_barrier: 8003 case OMPD_taskwait: 8004 case OMPD_taskgroup: 8005 case OMPD_atomic: 8006 case OMPD_flush: 8007 case OMPD_teams: 8008 case OMPD_target_data: 8009 case OMPD_target_exit_data: 8010 case OMPD_target_enter_data: 8011 case OMPD_distribute: 8012 case OMPD_distribute_simd: 8013 case OMPD_distribute_parallel_for: 8014 case OMPD_distribute_parallel_for_simd: 8015 case OMPD_teams_distribute: 8016 case OMPD_teams_distribute_simd: 8017 case OMPD_teams_distribute_parallel_for: 8018 case OMPD_teams_distribute_parallel_for_simd: 8019 case OMPD_target_update: 8020 case OMPD_declare_simd: 8021 case OMPD_declare_target: 8022 case OMPD_end_declare_target: 8023 case OMPD_declare_reduction: 8024 case OMPD_taskloop: 8025 case OMPD_taskloop_simd: 8026 case OMPD_unknown: 8027 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 8028 } 8029 return; 8030 } 8031 8032 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 8033 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 8034 return; 8035 8036 scanForTargetRegionsFunctions( 8037 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 8038 return; 8039 } 8040 8041 // If this is a lambda function, look into its body. 8042 if (const auto *L = dyn_cast<LambdaExpr>(S)) 8043 S = L->getBody(); 8044 8045 // Keep looking for target regions recursively. 8046 for (const Stmt *II : S->children()) 8047 scanForTargetRegionsFunctions(II, ParentName); 8048 } 8049 8050 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 8051 const auto *FD = cast<FunctionDecl>(GD.getDecl()); 8052 8053 // If emitting code for the host, we do not process FD here. Instead we do 8054 // the normal code generation. 8055 if (!CGM.getLangOpts().OpenMPIsDevice) 8056 return false; 8057 8058 // Try to detect target regions in the function. 8059 scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD)); 8060 8061 // Do not to emit function if it is not marked as declare target. 8062 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD) && 8063 AlreadyEmittedTargetFunctions.count(FD->getCanonicalDecl()) == 0; 8064 } 8065 8066 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 8067 if (!CGM.getLangOpts().OpenMPIsDevice) 8068 return false; 8069 8070 // Check if there are Ctors/Dtors in this declaration and look for target 8071 // regions in it. We use the complete variant to produce the kernel name 8072 // mangling. 8073 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 8074 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 8075 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 8076 StringRef ParentName = 8077 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 8078 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 8079 } 8080 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 8081 StringRef ParentName = 8082 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 8083 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 8084 } 8085 } 8086 8087 // Do not to emit variable if it is not marked as declare target. 8088 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8089 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 8090 cast<VarDecl>(GD.getDecl())); 8091 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { 8092 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 8093 return true; 8094 } 8095 return false; 8096 } 8097 8098 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 8099 llvm::Constant *Addr) { 8100 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8101 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 8102 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 8103 StringRef VarName; 8104 CharUnits VarSize; 8105 llvm::GlobalValue::LinkageTypes Linkage; 8106 switch (*Res) { 8107 case OMPDeclareTargetDeclAttr::MT_To: 8108 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 8109 VarName = CGM.getMangledName(VD); 8110 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 8111 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 8112 // Temp solution to prevent optimizations of the internal variables. 8113 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 8114 std::string RefName = getName({VarName, "ref"}); 8115 if (!CGM.GetGlobalValue(RefName)) { 8116 llvm::Constant *AddrRef = 8117 getOrCreateInternalVariable(Addr->getType(), RefName); 8118 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 8119 GVAddrRef->setConstant(/*Val=*/true); 8120 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 8121 GVAddrRef->setInitializer(Addr); 8122 CGM.addCompilerUsedGlobal(GVAddrRef); 8123 } 8124 } 8125 break; 8126 case OMPDeclareTargetDeclAttr::MT_Link: 8127 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 8128 if (CGM.getLangOpts().OpenMPIsDevice) { 8129 VarName = Addr->getName(); 8130 Addr = nullptr; 8131 } else { 8132 VarName = getAddrOfDeclareTargetLink(VD).getName(); 8133 Addr = 8134 cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); 8135 } 8136 VarSize = CGM.getPointerSize(); 8137 Linkage = llvm::GlobalValue::WeakAnyLinkage; 8138 break; 8139 } 8140 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 8141 VarName, Addr, VarSize, Flags, Linkage); 8142 } 8143 } 8144 8145 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 8146 if (isa<FunctionDecl>(GD.getDecl())) 8147 return emitTargetFunctions(GD); 8148 8149 return emitTargetGlobalVariable(GD); 8150 } 8151 8152 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 8153 for (const VarDecl *VD : DeferredGlobalVariables) { 8154 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8155 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8156 if (!Res) 8157 continue; 8158 if (*Res == OMPDeclareTargetDeclAttr::MT_To) { 8159 CGM.EmitGlobal(VD); 8160 } else { 8161 assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && 8162 "Expected to or link clauses."); 8163 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 8164 } 8165 } 8166 } 8167 8168 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 8169 CodeGenModule &CGM) 8170 : CGM(CGM) { 8171 if (CGM.getLangOpts().OpenMPIsDevice) { 8172 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 8173 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 8174 } 8175 } 8176 8177 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 8178 if (CGM.getLangOpts().OpenMPIsDevice) 8179 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 8180 } 8181 8182 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 8183 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 8184 return true; 8185 8186 const auto *D = cast<FunctionDecl>(GD.getDecl()); 8187 const FunctionDecl *FD = D->getCanonicalDecl(); 8188 // Do not to emit function if it is marked as declare target as it was already 8189 // emitted. 8190 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 8191 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) { 8192 if (auto *F = dyn_cast_or_null<llvm::Function>( 8193 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 8194 return !F->isDeclaration(); 8195 return false; 8196 } 8197 return true; 8198 } 8199 8200 return !AlreadyEmittedTargetFunctions.insert(FD).second; 8201 } 8202 8203 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 8204 // If we have offloading in the current module, we need to emit the entries 8205 // now and register the offloading descriptor. 8206 createOffloadEntriesAndInfoMetadata(); 8207 8208 // Create and register the offloading binary descriptors. This is the main 8209 // entity that captures all the information about offloading in the current 8210 // compilation unit. 8211 return createOffloadingBinaryDescriptorRegistration(); 8212 } 8213 8214 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 8215 const OMPExecutableDirective &D, 8216 SourceLocation Loc, 8217 llvm::Value *OutlinedFn, 8218 ArrayRef<llvm::Value *> CapturedVars) { 8219 if (!CGF.HaveInsertPoint()) 8220 return; 8221 8222 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 8223 CodeGenFunction::RunCleanupsScope Scope(CGF); 8224 8225 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 8226 llvm::Value *Args[] = { 8227 RTLoc, 8228 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 8229 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 8230 llvm::SmallVector<llvm::Value *, 16> RealArgs; 8231 RealArgs.append(std::begin(Args), std::end(Args)); 8232 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 8233 8234 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 8235 CGF.EmitRuntimeCall(RTLFn, RealArgs); 8236 } 8237 8238 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 8239 const Expr *NumTeams, 8240 const Expr *ThreadLimit, 8241 SourceLocation Loc) { 8242 if (!CGF.HaveInsertPoint()) 8243 return; 8244 8245 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 8246 8247 llvm::Value *NumTeamsVal = 8248 NumTeams 8249 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 8250 CGF.CGM.Int32Ty, /* isSigned = */ true) 8251 : CGF.Builder.getInt32(0); 8252 8253 llvm::Value *ThreadLimitVal = 8254 ThreadLimit 8255 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 8256 CGF.CGM.Int32Ty, /* isSigned = */ true) 8257 : CGF.Builder.getInt32(0); 8258 8259 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 8260 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 8261 ThreadLimitVal}; 8262 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 8263 PushNumTeamsArgs); 8264 } 8265 8266 void CGOpenMPRuntime::emitTargetDataCalls( 8267 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8268 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 8269 if (!CGF.HaveInsertPoint()) 8270 return; 8271 8272 // Action used to replace the default codegen action and turn privatization 8273 // off. 8274 PrePostActionTy NoPrivAction; 8275 8276 // Generate the code for the opening of the data environment. Capture all the 8277 // arguments of the runtime call by reference because they are used in the 8278 // closing of the region. 8279 auto &&BeginThenGen = [this, &D, Device, &Info, 8280 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 8281 // Fill up the arrays with all the mapped variables. 8282 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8283 MappableExprsHandler::MapValuesArrayTy Pointers; 8284 MappableExprsHandler::MapValuesArrayTy Sizes; 8285 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8286 8287 // Get map clause information. 8288 MappableExprsHandler MCHandler(D, CGF); 8289 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 8290 8291 // Fill up the arrays and create the arguments. 8292 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8293 8294 llvm::Value *BasePointersArrayArg = nullptr; 8295 llvm::Value *PointersArrayArg = nullptr; 8296 llvm::Value *SizesArrayArg = nullptr; 8297 llvm::Value *MapTypesArrayArg = nullptr; 8298 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 8299 SizesArrayArg, MapTypesArrayArg, Info); 8300 8301 // Emit device ID if any. 8302 llvm::Value *DeviceID = nullptr; 8303 if (Device) { 8304 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8305 CGF.Int64Ty, /*isSigned=*/true); 8306 } else { 8307 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8308 } 8309 8310 // Emit the number of elements in the offloading arrays. 8311 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 8312 8313 llvm::Value *OffloadingArgs[] = { 8314 DeviceID, PointerNum, BasePointersArrayArg, 8315 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 8316 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 8317 OffloadingArgs); 8318 8319 // If device pointer privatization is required, emit the body of the region 8320 // here. It will have to be duplicated: with and without privatization. 8321 if (!Info.CaptureDeviceAddrMap.empty()) 8322 CodeGen(CGF); 8323 }; 8324 8325 // Generate code for the closing of the data region. 8326 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 8327 PrePostActionTy &) { 8328 assert(Info.isValid() && "Invalid data environment closing arguments."); 8329 8330 llvm::Value *BasePointersArrayArg = nullptr; 8331 llvm::Value *PointersArrayArg = nullptr; 8332 llvm::Value *SizesArrayArg = nullptr; 8333 llvm::Value *MapTypesArrayArg = nullptr; 8334 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 8335 SizesArrayArg, MapTypesArrayArg, Info); 8336 8337 // Emit device ID if any. 8338 llvm::Value *DeviceID = nullptr; 8339 if (Device) { 8340 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8341 CGF.Int64Ty, /*isSigned=*/true); 8342 } else { 8343 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8344 } 8345 8346 // Emit the number of elements in the offloading arrays. 8347 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 8348 8349 llvm::Value *OffloadingArgs[] = { 8350 DeviceID, PointerNum, BasePointersArrayArg, 8351 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 8352 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 8353 OffloadingArgs); 8354 }; 8355 8356 // If we need device pointer privatization, we need to emit the body of the 8357 // region with no privatization in the 'else' branch of the conditional. 8358 // Otherwise, we don't have to do anything. 8359 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 8360 PrePostActionTy &) { 8361 if (!Info.CaptureDeviceAddrMap.empty()) { 8362 CodeGen.setAction(NoPrivAction); 8363 CodeGen(CGF); 8364 } 8365 }; 8366 8367 // We don't have to do anything to close the region if the if clause evaluates 8368 // to false. 8369 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 8370 8371 if (IfCond) { 8372 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 8373 } else { 8374 RegionCodeGenTy RCG(BeginThenGen); 8375 RCG(CGF); 8376 } 8377 8378 // If we don't require privatization of device pointers, we emit the body in 8379 // between the runtime calls. This avoids duplicating the body code. 8380 if (Info.CaptureDeviceAddrMap.empty()) { 8381 CodeGen.setAction(NoPrivAction); 8382 CodeGen(CGF); 8383 } 8384 8385 if (IfCond) { 8386 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 8387 } else { 8388 RegionCodeGenTy RCG(EndThenGen); 8389 RCG(CGF); 8390 } 8391 } 8392 8393 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 8394 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8395 const Expr *Device) { 8396 if (!CGF.HaveInsertPoint()) 8397 return; 8398 8399 assert((isa<OMPTargetEnterDataDirective>(D) || 8400 isa<OMPTargetExitDataDirective>(D) || 8401 isa<OMPTargetUpdateDirective>(D)) && 8402 "Expecting either target enter, exit data, or update directives."); 8403 8404 CodeGenFunction::OMPTargetDataInfo InputInfo; 8405 llvm::Value *MapTypesArray = nullptr; 8406 // Generate the code for the opening of the data environment. 8407 auto &&ThenGen = [this, &D, Device, &InputInfo, 8408 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 8409 // Emit device ID if any. 8410 llvm::Value *DeviceID = nullptr; 8411 if (Device) { 8412 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8413 CGF.Int64Ty, /*isSigned=*/true); 8414 } else { 8415 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8416 } 8417 8418 // Emit the number of elements in the offloading arrays. 8419 llvm::Constant *PointerNum = 8420 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8421 8422 llvm::Value *OffloadingArgs[] = {DeviceID, 8423 PointerNum, 8424 InputInfo.BasePointersArray.getPointer(), 8425 InputInfo.PointersArray.getPointer(), 8426 InputInfo.SizesArray.getPointer(), 8427 MapTypesArray}; 8428 8429 // Select the right runtime function call for each expected standalone 8430 // directive. 8431 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8432 OpenMPRTLFunction RTLFn; 8433 switch (D.getDirectiveKind()) { 8434 case OMPD_target_enter_data: 8435 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 8436 : OMPRTL__tgt_target_data_begin; 8437 break; 8438 case OMPD_target_exit_data: 8439 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 8440 : OMPRTL__tgt_target_data_end; 8441 break; 8442 case OMPD_target_update: 8443 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 8444 : OMPRTL__tgt_target_data_update; 8445 break; 8446 case OMPD_parallel: 8447 case OMPD_for: 8448 case OMPD_parallel_for: 8449 case OMPD_parallel_sections: 8450 case OMPD_for_simd: 8451 case OMPD_parallel_for_simd: 8452 case OMPD_cancel: 8453 case OMPD_cancellation_point: 8454 case OMPD_ordered: 8455 case OMPD_threadprivate: 8456 case OMPD_task: 8457 case OMPD_simd: 8458 case OMPD_sections: 8459 case OMPD_section: 8460 case OMPD_single: 8461 case OMPD_master: 8462 case OMPD_critical: 8463 case OMPD_taskyield: 8464 case OMPD_barrier: 8465 case OMPD_taskwait: 8466 case OMPD_taskgroup: 8467 case OMPD_atomic: 8468 case OMPD_flush: 8469 case OMPD_teams: 8470 case OMPD_target_data: 8471 case OMPD_distribute: 8472 case OMPD_distribute_simd: 8473 case OMPD_distribute_parallel_for: 8474 case OMPD_distribute_parallel_for_simd: 8475 case OMPD_teams_distribute: 8476 case OMPD_teams_distribute_simd: 8477 case OMPD_teams_distribute_parallel_for: 8478 case OMPD_teams_distribute_parallel_for_simd: 8479 case OMPD_declare_simd: 8480 case OMPD_declare_target: 8481 case OMPD_end_declare_target: 8482 case OMPD_declare_reduction: 8483 case OMPD_taskloop: 8484 case OMPD_taskloop_simd: 8485 case OMPD_target: 8486 case OMPD_target_simd: 8487 case OMPD_target_teams_distribute: 8488 case OMPD_target_teams_distribute_simd: 8489 case OMPD_target_teams_distribute_parallel_for: 8490 case OMPD_target_teams_distribute_parallel_for_simd: 8491 case OMPD_target_teams: 8492 case OMPD_target_parallel: 8493 case OMPD_target_parallel_for: 8494 case OMPD_target_parallel_for_simd: 8495 case OMPD_unknown: 8496 llvm_unreachable("Unexpected standalone target data directive."); 8497 break; 8498 } 8499 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 8500 }; 8501 8502 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 8503 CodeGenFunction &CGF, PrePostActionTy &) { 8504 // Fill up the arrays with all the mapped variables. 8505 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8506 MappableExprsHandler::MapValuesArrayTy Pointers; 8507 MappableExprsHandler::MapValuesArrayTy Sizes; 8508 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8509 8510 // Get map clause information. 8511 MappableExprsHandler MEHandler(D, CGF); 8512 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 8513 8514 TargetDataInfo Info; 8515 // Fill up the arrays and create the arguments. 8516 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8517 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8518 Info.PointersArray, Info.SizesArray, 8519 Info.MapTypesArray, Info); 8520 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8521 InputInfo.BasePointersArray = 8522 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8523 InputInfo.PointersArray = 8524 Address(Info.PointersArray, CGM.getPointerAlign()); 8525 InputInfo.SizesArray = 8526 Address(Info.SizesArray, CGM.getPointerAlign()); 8527 MapTypesArray = Info.MapTypesArray; 8528 if (D.hasClausesOfKind<OMPDependClause>()) 8529 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8530 else 8531 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8532 }; 8533 8534 if (IfCond) { 8535 emitOMPIfClause(CGF, IfCond, TargetThenGen, 8536 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 8537 } else { 8538 RegionCodeGenTy ThenRCG(TargetThenGen); 8539 ThenRCG(CGF); 8540 } 8541 } 8542 8543 namespace { 8544 /// Kind of parameter in a function with 'declare simd' directive. 8545 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 8546 /// Attribute set of the parameter. 8547 struct ParamAttrTy { 8548 ParamKindTy Kind = Vector; 8549 llvm::APSInt StrideOrArg; 8550 llvm::APSInt Alignment; 8551 }; 8552 } // namespace 8553 8554 static unsigned evaluateCDTSize(const FunctionDecl *FD, 8555 ArrayRef<ParamAttrTy> ParamAttrs) { 8556 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 8557 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 8558 // of that clause. The VLEN value must be power of 2. 8559 // In other case the notion of the function`s "characteristic data type" (CDT) 8560 // is used to compute the vector length. 8561 // CDT is defined in the following order: 8562 // a) For non-void function, the CDT is the return type. 8563 // b) If the function has any non-uniform, non-linear parameters, then the 8564 // CDT is the type of the first such parameter. 8565 // c) If the CDT determined by a) or b) above is struct, union, or class 8566 // type which is pass-by-value (except for the type that maps to the 8567 // built-in complex data type), the characteristic data type is int. 8568 // d) If none of the above three cases is applicable, the CDT is int. 8569 // The VLEN is then determined based on the CDT and the size of vector 8570 // register of that ISA for which current vector version is generated. The 8571 // VLEN is computed using the formula below: 8572 // VLEN = sizeof(vector_register) / sizeof(CDT), 8573 // where vector register size specified in section 3.2.1 Registers and the 8574 // Stack Frame of original AMD64 ABI document. 8575 QualType RetType = FD->getReturnType(); 8576 if (RetType.isNull()) 8577 return 0; 8578 ASTContext &C = FD->getASTContext(); 8579 QualType CDT; 8580 if (!RetType.isNull() && !RetType->isVoidType()) { 8581 CDT = RetType; 8582 } else { 8583 unsigned Offset = 0; 8584 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 8585 if (ParamAttrs[Offset].Kind == Vector) 8586 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 8587 ++Offset; 8588 } 8589 if (CDT.isNull()) { 8590 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 8591 if (ParamAttrs[I + Offset].Kind == Vector) { 8592 CDT = FD->getParamDecl(I)->getType(); 8593 break; 8594 } 8595 } 8596 } 8597 } 8598 if (CDT.isNull()) 8599 CDT = C.IntTy; 8600 CDT = CDT->getCanonicalTypeUnqualified(); 8601 if (CDT->isRecordType() || CDT->isUnionType()) 8602 CDT = C.IntTy; 8603 return C.getTypeSize(CDT); 8604 } 8605 8606 static void 8607 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 8608 const llvm::APSInt &VLENVal, 8609 ArrayRef<ParamAttrTy> ParamAttrs, 8610 OMPDeclareSimdDeclAttr::BranchStateTy State) { 8611 struct ISADataTy { 8612 char ISA; 8613 unsigned VecRegSize; 8614 }; 8615 ISADataTy ISAData[] = { 8616 { 8617 'b', 128 8618 }, // SSE 8619 { 8620 'c', 256 8621 }, // AVX 8622 { 8623 'd', 256 8624 }, // AVX2 8625 { 8626 'e', 512 8627 }, // AVX512 8628 }; 8629 llvm::SmallVector<char, 2> Masked; 8630 switch (State) { 8631 case OMPDeclareSimdDeclAttr::BS_Undefined: 8632 Masked.push_back('N'); 8633 Masked.push_back('M'); 8634 break; 8635 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 8636 Masked.push_back('N'); 8637 break; 8638 case OMPDeclareSimdDeclAttr::BS_Inbranch: 8639 Masked.push_back('M'); 8640 break; 8641 } 8642 for (char Mask : Masked) { 8643 for (const ISADataTy &Data : ISAData) { 8644 SmallString<256> Buffer; 8645 llvm::raw_svector_ostream Out(Buffer); 8646 Out << "_ZGV" << Data.ISA << Mask; 8647 if (!VLENVal) { 8648 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 8649 evaluateCDTSize(FD, ParamAttrs)); 8650 } else { 8651 Out << VLENVal; 8652 } 8653 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 8654 switch (ParamAttr.Kind){ 8655 case LinearWithVarStride: 8656 Out << 's' << ParamAttr.StrideOrArg; 8657 break; 8658 case Linear: 8659 Out << 'l'; 8660 if (!!ParamAttr.StrideOrArg) 8661 Out << ParamAttr.StrideOrArg; 8662 break; 8663 case Uniform: 8664 Out << 'u'; 8665 break; 8666 case Vector: 8667 Out << 'v'; 8668 break; 8669 } 8670 if (!!ParamAttr.Alignment) 8671 Out << 'a' << ParamAttr.Alignment; 8672 } 8673 Out << '_' << Fn->getName(); 8674 Fn->addFnAttr(Out.str()); 8675 } 8676 } 8677 } 8678 8679 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 8680 llvm::Function *Fn) { 8681 ASTContext &C = CGM.getContext(); 8682 FD = FD->getMostRecentDecl(); 8683 // Map params to their positions in function decl. 8684 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 8685 if (isa<CXXMethodDecl>(FD)) 8686 ParamPositions.try_emplace(FD, 0); 8687 unsigned ParamPos = ParamPositions.size(); 8688 for (const ParmVarDecl *P : FD->parameters()) { 8689 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 8690 ++ParamPos; 8691 } 8692 while (FD) { 8693 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 8694 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 8695 // Mark uniform parameters. 8696 for (const Expr *E : Attr->uniforms()) { 8697 E = E->IgnoreParenImpCasts(); 8698 unsigned Pos; 8699 if (isa<CXXThisExpr>(E)) { 8700 Pos = ParamPositions[FD]; 8701 } else { 8702 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 8703 ->getCanonicalDecl(); 8704 Pos = ParamPositions[PVD]; 8705 } 8706 ParamAttrs[Pos].Kind = Uniform; 8707 } 8708 // Get alignment info. 8709 auto NI = Attr->alignments_begin(); 8710 for (const Expr *E : Attr->aligneds()) { 8711 E = E->IgnoreParenImpCasts(); 8712 unsigned Pos; 8713 QualType ParmTy; 8714 if (isa<CXXThisExpr>(E)) { 8715 Pos = ParamPositions[FD]; 8716 ParmTy = E->getType(); 8717 } else { 8718 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 8719 ->getCanonicalDecl(); 8720 Pos = ParamPositions[PVD]; 8721 ParmTy = PVD->getType(); 8722 } 8723 ParamAttrs[Pos].Alignment = 8724 (*NI) 8725 ? (*NI)->EvaluateKnownConstInt(C) 8726 : llvm::APSInt::getUnsigned( 8727 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 8728 .getQuantity()); 8729 ++NI; 8730 } 8731 // Mark linear parameters. 8732 auto SI = Attr->steps_begin(); 8733 auto MI = Attr->modifiers_begin(); 8734 for (const Expr *E : Attr->linears()) { 8735 E = E->IgnoreParenImpCasts(); 8736 unsigned Pos; 8737 if (isa<CXXThisExpr>(E)) { 8738 Pos = ParamPositions[FD]; 8739 } else { 8740 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 8741 ->getCanonicalDecl(); 8742 Pos = ParamPositions[PVD]; 8743 } 8744 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 8745 ParamAttr.Kind = Linear; 8746 if (*SI) { 8747 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 8748 Expr::SE_AllowSideEffects)) { 8749 if (const auto *DRE = 8750 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 8751 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 8752 ParamAttr.Kind = LinearWithVarStride; 8753 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 8754 ParamPositions[StridePVD->getCanonicalDecl()]); 8755 } 8756 } 8757 } 8758 } 8759 ++SI; 8760 ++MI; 8761 } 8762 llvm::APSInt VLENVal; 8763 if (const Expr *VLEN = Attr->getSimdlen()) 8764 VLENVal = VLEN->EvaluateKnownConstInt(C); 8765 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 8766 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 8767 CGM.getTriple().getArch() == llvm::Triple::x86_64) 8768 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 8769 } 8770 FD = FD->getPreviousDecl(); 8771 } 8772 } 8773 8774 namespace { 8775 /// Cleanup action for doacross support. 8776 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 8777 public: 8778 static const int DoacrossFinArgs = 2; 8779 8780 private: 8781 llvm::Value *RTLFn; 8782 llvm::Value *Args[DoacrossFinArgs]; 8783 8784 public: 8785 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 8786 : RTLFn(RTLFn) { 8787 assert(CallArgs.size() == DoacrossFinArgs); 8788 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 8789 } 8790 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 8791 if (!CGF.HaveInsertPoint()) 8792 return; 8793 CGF.EmitRuntimeCall(RTLFn, Args); 8794 } 8795 }; 8796 } // namespace 8797 8798 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 8799 const OMPLoopDirective &D, 8800 ArrayRef<Expr *> NumIterations) { 8801 if (!CGF.HaveInsertPoint()) 8802 return; 8803 8804 ASTContext &C = CGM.getContext(); 8805 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8806 RecordDecl *RD; 8807 if (KmpDimTy.isNull()) { 8808 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 8809 // kmp_int64 lo; // lower 8810 // kmp_int64 up; // upper 8811 // kmp_int64 st; // stride 8812 // }; 8813 RD = C.buildImplicitRecord("kmp_dim"); 8814 RD->startDefinition(); 8815 addFieldToRecordDecl(C, RD, Int64Ty); 8816 addFieldToRecordDecl(C, RD, Int64Ty); 8817 addFieldToRecordDecl(C, RD, Int64Ty); 8818 RD->completeDefinition(); 8819 KmpDimTy = C.getRecordType(RD); 8820 } else { 8821 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 8822 } 8823 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 8824 QualType ArrayTy = 8825 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 8826 8827 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 8828 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 8829 enum { LowerFD = 0, UpperFD, StrideFD }; 8830 // Fill dims with data. 8831 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 8832 LValue DimsLVal = 8833 CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP( 8834 DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)), 8835 KmpDimTy); 8836 // dims.upper = num_iterations; 8837 LValue UpperLVal = CGF.EmitLValueForField( 8838 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 8839 llvm::Value *NumIterVal = 8840 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 8841 D.getNumIterations()->getType(), Int64Ty, 8842 D.getNumIterations()->getExprLoc()); 8843 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 8844 // dims.stride = 1; 8845 LValue StrideLVal = CGF.EmitLValueForField( 8846 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 8847 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 8848 StrideLVal); 8849 } 8850 8851 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 8852 // kmp_int32 num_dims, struct kmp_dim * dims); 8853 llvm::Value *Args[] = { 8854 emitUpdateLocation(CGF, D.getBeginLoc()), 8855 getThreadID(CGF, D.getBeginLoc()), 8856 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 8857 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8858 CGF.Builder 8859 .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy)) 8860 .getPointer(), 8861 CGM.VoidPtrTy)}; 8862 8863 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 8864 CGF.EmitRuntimeCall(RTLFn, Args); 8865 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 8866 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 8867 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 8868 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 8869 llvm::makeArrayRef(FiniArgs)); 8870 } 8871 8872 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 8873 const OMPDependClause *C) { 8874 QualType Int64Ty = 8875 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8876 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 8877 QualType ArrayTy = CGM.getContext().getConstantArrayType( 8878 Int64Ty, Size, ArrayType::Normal, 0); 8879 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 8880 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 8881 const Expr *CounterVal = C->getLoopData(I); 8882 assert(CounterVal); 8883 llvm::Value *CntVal = CGF.EmitScalarConversion( 8884 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 8885 CounterVal->getExprLoc()); 8886 CGF.EmitStoreOfScalar( 8887 CntVal, 8888 CGF.Builder.CreateConstArrayGEP( 8889 CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)), 8890 /*Volatile=*/false, Int64Ty); 8891 } 8892 llvm::Value *Args[] = { 8893 emitUpdateLocation(CGF, C->getBeginLoc()), 8894 getThreadID(CGF, C->getBeginLoc()), 8895 CGF.Builder 8896 .CreateConstArrayGEP(CntAddr, 0, 8897 CGM.getContext().getTypeSizeInChars(Int64Ty)) 8898 .getPointer()}; 8899 llvm::Value *RTLFn; 8900 if (C->getDependencyKind() == OMPC_DEPEND_source) { 8901 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 8902 } else { 8903 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 8904 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 8905 } 8906 CGF.EmitRuntimeCall(RTLFn, Args); 8907 } 8908 8909 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 8910 llvm::Value *Callee, 8911 ArrayRef<llvm::Value *> Args) const { 8912 assert(Loc.isValid() && "Outlined function call location must be valid."); 8913 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 8914 8915 if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { 8916 if (Fn->doesNotThrow()) { 8917 CGF.EmitNounwindRuntimeCall(Fn, Args); 8918 return; 8919 } 8920 } 8921 CGF.EmitRuntimeCall(Callee, Args); 8922 } 8923 8924 void CGOpenMPRuntime::emitOutlinedFunctionCall( 8925 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, 8926 ArrayRef<llvm::Value *> Args) const { 8927 emitCall(CGF, Loc, OutlinedFn, Args); 8928 } 8929 8930 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 8931 const VarDecl *NativeParam, 8932 const VarDecl *TargetParam) const { 8933 return CGF.GetAddrOfLocalVar(NativeParam); 8934 } 8935 8936 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 8937 const VarDecl *VD) { 8938 return Address::invalid(); 8939 } 8940 8941 llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 8942 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8943 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 8944 llvm_unreachable("Not supported in SIMD-only mode"); 8945 } 8946 8947 llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 8948 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8949 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 8950 llvm_unreachable("Not supported in SIMD-only mode"); 8951 } 8952 8953 llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 8954 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 8955 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 8956 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 8957 bool Tied, unsigned &NumberOfParts) { 8958 llvm_unreachable("Not supported in SIMD-only mode"); 8959 } 8960 8961 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 8962 SourceLocation Loc, 8963 llvm::Value *OutlinedFn, 8964 ArrayRef<llvm::Value *> CapturedVars, 8965 const Expr *IfCond) { 8966 llvm_unreachable("Not supported in SIMD-only mode"); 8967 } 8968 8969 void CGOpenMPSIMDRuntime::emitCriticalRegion( 8970 CodeGenFunction &CGF, StringRef CriticalName, 8971 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 8972 const Expr *Hint) { 8973 llvm_unreachable("Not supported in SIMD-only mode"); 8974 } 8975 8976 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 8977 const RegionCodeGenTy &MasterOpGen, 8978 SourceLocation Loc) { 8979 llvm_unreachable("Not supported in SIMD-only mode"); 8980 } 8981 8982 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 8983 SourceLocation Loc) { 8984 llvm_unreachable("Not supported in SIMD-only mode"); 8985 } 8986 8987 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 8988 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 8989 SourceLocation Loc) { 8990 llvm_unreachable("Not supported in SIMD-only mode"); 8991 } 8992 8993 void CGOpenMPSIMDRuntime::emitSingleRegion( 8994 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 8995 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 8996 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 8997 ArrayRef<const Expr *> AssignmentOps) { 8998 llvm_unreachable("Not supported in SIMD-only mode"); 8999 } 9000 9001 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 9002 const RegionCodeGenTy &OrderedOpGen, 9003 SourceLocation Loc, 9004 bool IsThreads) { 9005 llvm_unreachable("Not supported in SIMD-only mode"); 9006 } 9007 9008 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 9009 SourceLocation Loc, 9010 OpenMPDirectiveKind Kind, 9011 bool EmitChecks, 9012 bool ForceSimpleCall) { 9013 llvm_unreachable("Not supported in SIMD-only mode"); 9014 } 9015 9016 void CGOpenMPSIMDRuntime::emitForDispatchInit( 9017 CodeGenFunction &CGF, SourceLocation Loc, 9018 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 9019 bool Ordered, const DispatchRTInput &DispatchValues) { 9020 llvm_unreachable("Not supported in SIMD-only mode"); 9021 } 9022 9023 void CGOpenMPSIMDRuntime::emitForStaticInit( 9024 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 9025 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 9026 llvm_unreachable("Not supported in SIMD-only mode"); 9027 } 9028 9029 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 9030 CodeGenFunction &CGF, SourceLocation Loc, 9031 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 9032 llvm_unreachable("Not supported in SIMD-only mode"); 9033 } 9034 9035 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 9036 SourceLocation Loc, 9037 unsigned IVSize, 9038 bool IVSigned) { 9039 llvm_unreachable("Not supported in SIMD-only mode"); 9040 } 9041 9042 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 9043 SourceLocation Loc, 9044 OpenMPDirectiveKind DKind) { 9045 llvm_unreachable("Not supported in SIMD-only mode"); 9046 } 9047 9048 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 9049 SourceLocation Loc, 9050 unsigned IVSize, bool IVSigned, 9051 Address IL, Address LB, 9052 Address UB, Address ST) { 9053 llvm_unreachable("Not supported in SIMD-only mode"); 9054 } 9055 9056 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 9057 llvm::Value *NumThreads, 9058 SourceLocation Loc) { 9059 llvm_unreachable("Not supported in SIMD-only mode"); 9060 } 9061 9062 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 9063 OpenMPProcBindClauseKind ProcBind, 9064 SourceLocation Loc) { 9065 llvm_unreachable("Not supported in SIMD-only mode"); 9066 } 9067 9068 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 9069 const VarDecl *VD, 9070 Address VDAddr, 9071 SourceLocation Loc) { 9072 llvm_unreachable("Not supported in SIMD-only mode"); 9073 } 9074 9075 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 9076 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 9077 CodeGenFunction *CGF) { 9078 llvm_unreachable("Not supported in SIMD-only mode"); 9079 } 9080 9081 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 9082 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 9083 llvm_unreachable("Not supported in SIMD-only mode"); 9084 } 9085 9086 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 9087 ArrayRef<const Expr *> Vars, 9088 SourceLocation Loc) { 9089 llvm_unreachable("Not supported in SIMD-only mode"); 9090 } 9091 9092 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 9093 const OMPExecutableDirective &D, 9094 llvm::Value *TaskFunction, 9095 QualType SharedsTy, Address Shareds, 9096 const Expr *IfCond, 9097 const OMPTaskDataTy &Data) { 9098 llvm_unreachable("Not supported in SIMD-only mode"); 9099 } 9100 9101 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 9102 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 9103 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 9104 const Expr *IfCond, const OMPTaskDataTy &Data) { 9105 llvm_unreachable("Not supported in SIMD-only mode"); 9106 } 9107 9108 void CGOpenMPSIMDRuntime::emitReduction( 9109 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 9110 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 9111 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 9112 assert(Options.SimpleReduction && "Only simple reduction is expected."); 9113 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 9114 ReductionOps, Options); 9115 } 9116 9117 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 9118 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 9119 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 9120 llvm_unreachable("Not supported in SIMD-only mode"); 9121 } 9122 9123 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 9124 SourceLocation Loc, 9125 ReductionCodeGen &RCG, 9126 unsigned N) { 9127 llvm_unreachable("Not supported in SIMD-only mode"); 9128 } 9129 9130 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 9131 SourceLocation Loc, 9132 llvm::Value *ReductionsPtr, 9133 LValue SharedLVal) { 9134 llvm_unreachable("Not supported in SIMD-only mode"); 9135 } 9136 9137 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 9138 SourceLocation Loc) { 9139 llvm_unreachable("Not supported in SIMD-only mode"); 9140 } 9141 9142 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 9143 CodeGenFunction &CGF, SourceLocation Loc, 9144 OpenMPDirectiveKind CancelRegion) { 9145 llvm_unreachable("Not supported in SIMD-only mode"); 9146 } 9147 9148 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 9149 SourceLocation Loc, const Expr *IfCond, 9150 OpenMPDirectiveKind CancelRegion) { 9151 llvm_unreachable("Not supported in SIMD-only mode"); 9152 } 9153 9154 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 9155 const OMPExecutableDirective &D, StringRef ParentName, 9156 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 9157 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 9158 llvm_unreachable("Not supported in SIMD-only mode"); 9159 } 9160 9161 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 9162 const OMPExecutableDirective &D, 9163 llvm::Value *OutlinedFn, 9164 llvm::Value *OutlinedFnID, 9165 const Expr *IfCond, const Expr *Device) { 9166 llvm_unreachable("Not supported in SIMD-only mode"); 9167 } 9168 9169 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 9170 llvm_unreachable("Not supported in SIMD-only mode"); 9171 } 9172 9173 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9174 llvm_unreachable("Not supported in SIMD-only mode"); 9175 } 9176 9177 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 9178 return false; 9179 } 9180 9181 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 9182 return nullptr; 9183 } 9184 9185 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 9186 const OMPExecutableDirective &D, 9187 SourceLocation Loc, 9188 llvm::Value *OutlinedFn, 9189 ArrayRef<llvm::Value *> CapturedVars) { 9190 llvm_unreachable("Not supported in SIMD-only mode"); 9191 } 9192 9193 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9194 const Expr *NumTeams, 9195 const Expr *ThreadLimit, 9196 SourceLocation Loc) { 9197 llvm_unreachable("Not supported in SIMD-only mode"); 9198 } 9199 9200 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 9201 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9202 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9203 llvm_unreachable("Not supported in SIMD-only mode"); 9204 } 9205 9206 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 9207 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9208 const Expr *Device) { 9209 llvm_unreachable("Not supported in SIMD-only mode"); 9210 } 9211 9212 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 9213 const OMPLoopDirective &D, 9214 ArrayRef<Expr *> NumIterations) { 9215 llvm_unreachable("Not supported in SIMD-only mode"); 9216 } 9217 9218 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 9219 const OMPDependClause *C) { 9220 llvm_unreachable("Not supported in SIMD-only mode"); 9221 } 9222 9223 const VarDecl * 9224 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 9225 const VarDecl *NativeParam) const { 9226 llvm_unreachable("Not supported in SIMD-only mode"); 9227 } 9228 9229 Address 9230 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 9231 const VarDecl *NativeParam, 9232 const VarDecl *TargetParam) const { 9233 llvm_unreachable("Not supported in SIMD-only mode"); 9234 } 9235 9236