1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 /// Describes ident structure that describes a source location. 461 /// All descriptions are taken from 462 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 463 /// Original structure: 464 /// typedef struct ident { 465 /// kmp_int32 reserved_1; /**< might be used in Fortran; 466 /// see above */ 467 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 468 /// KMP_IDENT_KMPC identifies this union 469 /// member */ 470 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 471 /// see above */ 472 ///#if USE_ITT_BUILD 473 /// /* but currently used for storing 474 /// region-specific ITT */ 475 /// /* contextual information. */ 476 ///#endif /* USE_ITT_BUILD */ 477 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 478 /// C++ */ 479 /// char const *psource; /**< String describing the source location. 480 /// The string is composed of semi-colon separated 481 // fields which describe the source file, 482 /// the function and a pair of line numbers that 483 /// delimit the construct. 484 /// */ 485 /// } ident_t; 486 enum IdentFieldIndex { 487 /// might be used in Fortran 488 IdentField_Reserved_1, 489 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 490 IdentField_Flags, 491 /// Not really used in Fortran any more 492 IdentField_Reserved_2, 493 /// Source[4] in Fortran, do not use for C++ 494 IdentField_Reserved_3, 495 /// String describing the source location. The string is composed of 496 /// semi-colon separated fields which describe the source file, the function 497 /// and a pair of line numbers that delimit the construct. 498 IdentField_PSource 499 }; 500 501 /// Schedule types for 'omp for' loops (these enumerators are taken from 502 /// the enum sched_type in kmp.h). 503 enum OpenMPSchedType { 504 /// Lower bound for default (unordered) versions. 505 OMP_sch_lower = 32, 506 OMP_sch_static_chunked = 33, 507 OMP_sch_static = 34, 508 OMP_sch_dynamic_chunked = 35, 509 OMP_sch_guided_chunked = 36, 510 OMP_sch_runtime = 37, 511 OMP_sch_auto = 38, 512 /// static with chunk adjustment (e.g., simd) 513 OMP_sch_static_balanced_chunked = 45, 514 /// Lower bound for 'ordered' versions. 515 OMP_ord_lower = 64, 516 OMP_ord_static_chunked = 65, 517 OMP_ord_static = 66, 518 OMP_ord_dynamic_chunked = 67, 519 OMP_ord_guided_chunked = 68, 520 OMP_ord_runtime = 69, 521 OMP_ord_auto = 70, 522 OMP_sch_default = OMP_sch_static, 523 /// dist_schedule types 524 OMP_dist_sch_static_chunked = 91, 525 OMP_dist_sch_static = 92, 526 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 527 /// Set if the monotonic schedule modifier was present. 528 OMP_sch_modifier_monotonic = (1 << 29), 529 /// Set if the nonmonotonic schedule modifier was present. 530 OMP_sch_modifier_nonmonotonic = (1 << 30), 531 }; 532 533 enum OpenMPRTLFunction { 534 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 535 /// kmpc_micro microtask, ...); 536 OMPRTL__kmpc_fork_call, 537 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 538 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 539 OMPRTL__kmpc_threadprivate_cached, 540 /// Call to void __kmpc_threadprivate_register( ident_t *, 541 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 542 OMPRTL__kmpc_threadprivate_register, 543 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 544 OMPRTL__kmpc_global_thread_num, 545 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 546 // kmp_critical_name *crit); 547 OMPRTL__kmpc_critical, 548 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 549 // global_tid, kmp_critical_name *crit, uintptr_t hint); 550 OMPRTL__kmpc_critical_with_hint, 551 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 552 // kmp_critical_name *crit); 553 OMPRTL__kmpc_end_critical, 554 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 555 // global_tid); 556 OMPRTL__kmpc_cancel_barrier, 557 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 558 OMPRTL__kmpc_barrier, 559 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 560 OMPRTL__kmpc_for_static_fini, 561 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 562 // global_tid); 563 OMPRTL__kmpc_serialized_parallel, 564 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 565 // global_tid); 566 OMPRTL__kmpc_end_serialized_parallel, 567 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 568 // kmp_int32 num_threads); 569 OMPRTL__kmpc_push_num_threads, 570 // Call to void __kmpc_flush(ident_t *loc); 571 OMPRTL__kmpc_flush, 572 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 573 OMPRTL__kmpc_master, 574 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 575 OMPRTL__kmpc_end_master, 576 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 577 // int end_part); 578 OMPRTL__kmpc_omp_taskyield, 579 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 580 OMPRTL__kmpc_single, 581 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 582 OMPRTL__kmpc_end_single, 583 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 584 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 585 // kmp_routine_entry_t *task_entry); 586 OMPRTL__kmpc_omp_task_alloc, 587 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 588 // new_task); 589 OMPRTL__kmpc_omp_task, 590 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 591 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 592 // kmp_int32 didit); 593 OMPRTL__kmpc_copyprivate, 594 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 595 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 596 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 597 OMPRTL__kmpc_reduce, 598 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 599 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 600 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 601 // *lck); 602 OMPRTL__kmpc_reduce_nowait, 603 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 604 // kmp_critical_name *lck); 605 OMPRTL__kmpc_end_reduce, 606 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 607 // kmp_critical_name *lck); 608 OMPRTL__kmpc_end_reduce_nowait, 609 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 610 // kmp_task_t * new_task); 611 OMPRTL__kmpc_omp_task_begin_if0, 612 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 613 // kmp_task_t * new_task); 614 OMPRTL__kmpc_omp_task_complete_if0, 615 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 616 OMPRTL__kmpc_ordered, 617 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 618 OMPRTL__kmpc_end_ordered, 619 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 620 // global_tid); 621 OMPRTL__kmpc_omp_taskwait, 622 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 623 OMPRTL__kmpc_taskgroup, 624 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 625 OMPRTL__kmpc_end_taskgroup, 626 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 627 // int proc_bind); 628 OMPRTL__kmpc_push_proc_bind, 629 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 630 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 631 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 632 OMPRTL__kmpc_omp_task_with_deps, 633 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 634 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 635 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 636 OMPRTL__kmpc_omp_wait_deps, 637 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 638 // global_tid, kmp_int32 cncl_kind); 639 OMPRTL__kmpc_cancellationpoint, 640 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 641 // kmp_int32 cncl_kind); 642 OMPRTL__kmpc_cancel, 643 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 644 // kmp_int32 num_teams, kmp_int32 thread_limit); 645 OMPRTL__kmpc_push_num_teams, 646 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 647 // microtask, ...); 648 OMPRTL__kmpc_fork_teams, 649 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 650 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 651 // sched, kmp_uint64 grainsize, void *task_dup); 652 OMPRTL__kmpc_taskloop, 653 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 654 // num_dims, struct kmp_dim *dims); 655 OMPRTL__kmpc_doacross_init, 656 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 657 OMPRTL__kmpc_doacross_fini, 658 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 659 // *vec); 660 OMPRTL__kmpc_doacross_post, 661 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 662 // *vec); 663 OMPRTL__kmpc_doacross_wait, 664 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 665 // *data); 666 OMPRTL__kmpc_task_reduction_init, 667 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 668 // *d); 669 OMPRTL__kmpc_task_reduction_get_th_data, 670 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 671 OMPRTL__kmpc_alloc, 672 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 673 OMPRTL__kmpc_free, 674 675 // 676 // Offloading related calls 677 // 678 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 679 // size); 680 OMPRTL__kmpc_push_target_tripcount, 681 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 682 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 683 // *arg_types); 684 OMPRTL__tgt_target, 685 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 686 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 687 // *arg_types); 688 OMPRTL__tgt_target_nowait, 689 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 690 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 691 // *arg_types, int32_t num_teams, int32_t thread_limit); 692 OMPRTL__tgt_target_teams, 693 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 694 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 695 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 696 OMPRTL__tgt_target_teams_nowait, 697 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 698 OMPRTL__tgt_register_lib, 699 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 700 OMPRTL__tgt_unregister_lib, 701 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 702 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 703 OMPRTL__tgt_target_data_begin, 704 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 705 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 706 // *arg_types); 707 OMPRTL__tgt_target_data_begin_nowait, 708 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 709 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 710 OMPRTL__tgt_target_data_end, 711 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 712 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 713 // *arg_types); 714 OMPRTL__tgt_target_data_end_nowait, 715 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 716 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 717 OMPRTL__tgt_target_data_update, 718 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 719 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 720 // *arg_types); 721 OMPRTL__tgt_target_data_update_nowait, 722 }; 723 724 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 725 /// region. 726 class CleanupTy final : public EHScopeStack::Cleanup { 727 PrePostActionTy *Action; 728 729 public: 730 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 731 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 732 if (!CGF.HaveInsertPoint()) 733 return; 734 Action->Exit(CGF); 735 } 736 }; 737 738 } // anonymous namespace 739 740 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 741 CodeGenFunction::RunCleanupsScope Scope(CGF); 742 if (PrePostAction) { 743 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 744 Callback(CodeGen, CGF, *PrePostAction); 745 } else { 746 PrePostActionTy Action; 747 Callback(CodeGen, CGF, Action); 748 } 749 } 750 751 /// Check if the combiner is a call to UDR combiner and if it is so return the 752 /// UDR decl used for reduction. 753 static const OMPDeclareReductionDecl * 754 getReductionInit(const Expr *ReductionOp) { 755 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 756 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 757 if (const auto *DRE = 758 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 759 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 760 return DRD; 761 return nullptr; 762 } 763 764 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 765 const OMPDeclareReductionDecl *DRD, 766 const Expr *InitOp, 767 Address Private, Address Original, 768 QualType Ty) { 769 if (DRD->getInitializer()) { 770 std::pair<llvm::Function *, llvm::Function *> Reduction = 771 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 772 const auto *CE = cast<CallExpr>(InitOp); 773 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 774 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 775 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 776 const auto *LHSDRE = 777 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 778 const auto *RHSDRE = 779 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 780 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 781 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 782 [=]() { return Private; }); 783 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 784 [=]() { return Original; }); 785 (void)PrivateScope.Privatize(); 786 RValue Func = RValue::get(Reduction.second); 787 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 788 CGF.EmitIgnoredExpr(InitOp); 789 } else { 790 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 791 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 792 auto *GV = new llvm::GlobalVariable( 793 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 794 llvm::GlobalValue::PrivateLinkage, Init, Name); 795 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 796 RValue InitRVal; 797 switch (CGF.getEvaluationKind(Ty)) { 798 case TEK_Scalar: 799 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 800 break; 801 case TEK_Complex: 802 InitRVal = 803 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 804 break; 805 case TEK_Aggregate: 806 InitRVal = RValue::getAggregate(LV.getAddress()); 807 break; 808 } 809 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 810 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 811 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 812 /*IsInitializer=*/false); 813 } 814 } 815 816 /// Emit initialization of arrays of complex types. 817 /// \param DestAddr Address of the array. 818 /// \param Type Type of array. 819 /// \param Init Initial expression of array. 820 /// \param SrcAddr Address of the original array. 821 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 822 QualType Type, bool EmitDeclareReductionInit, 823 const Expr *Init, 824 const OMPDeclareReductionDecl *DRD, 825 Address SrcAddr = Address::invalid()) { 826 // Perform element-by-element initialization. 827 QualType ElementTy; 828 829 // Drill down to the base element type on both arrays. 830 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 831 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 832 DestAddr = 833 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 834 if (DRD) 835 SrcAddr = 836 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 837 838 llvm::Value *SrcBegin = nullptr; 839 if (DRD) 840 SrcBegin = SrcAddr.getPointer(); 841 llvm::Value *DestBegin = DestAddr.getPointer(); 842 // Cast from pointer to array type to pointer to single element. 843 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 844 // The basic structure here is a while-do loop. 845 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 846 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 847 llvm::Value *IsEmpty = 848 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 849 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 850 851 // Enter the loop body, making that address the current address. 852 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 853 CGF.EmitBlock(BodyBB); 854 855 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 856 857 llvm::PHINode *SrcElementPHI = nullptr; 858 Address SrcElementCurrent = Address::invalid(); 859 if (DRD) { 860 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 861 "omp.arraycpy.srcElementPast"); 862 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 863 SrcElementCurrent = 864 Address(SrcElementPHI, 865 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 866 } 867 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 868 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 869 DestElementPHI->addIncoming(DestBegin, EntryBB); 870 Address DestElementCurrent = 871 Address(DestElementPHI, 872 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 873 874 // Emit copy. 875 { 876 CodeGenFunction::RunCleanupsScope InitScope(CGF); 877 if (EmitDeclareReductionInit) { 878 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 879 SrcElementCurrent, ElementTy); 880 } else 881 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 882 /*IsInitializer=*/false); 883 } 884 885 if (DRD) { 886 // Shift the address forward by one element. 887 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 888 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 889 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 890 } 891 892 // Shift the address forward by one element. 893 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 894 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 895 // Check whether we've reached the end. 896 llvm::Value *Done = 897 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 898 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 899 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 900 901 // Done. 902 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 903 } 904 905 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 906 return CGF.EmitOMPSharedLValue(E); 907 } 908 909 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 910 const Expr *E) { 911 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 912 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 913 return LValue(); 914 } 915 916 void ReductionCodeGen::emitAggregateInitialization( 917 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 918 const OMPDeclareReductionDecl *DRD) { 919 // Emit VarDecl with copy init for arrays. 920 // Get the address of the original variable captured in current 921 // captured region. 922 const auto *PrivateVD = 923 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 924 bool EmitDeclareReductionInit = 925 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 926 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 927 EmitDeclareReductionInit, 928 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 929 : PrivateVD->getInit(), 930 DRD, SharedLVal.getAddress()); 931 } 932 933 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 934 ArrayRef<const Expr *> Privates, 935 ArrayRef<const Expr *> ReductionOps) { 936 ClausesData.reserve(Shareds.size()); 937 SharedAddresses.reserve(Shareds.size()); 938 Sizes.reserve(Shareds.size()); 939 BaseDecls.reserve(Shareds.size()); 940 auto IPriv = Privates.begin(); 941 auto IRed = ReductionOps.begin(); 942 for (const Expr *Ref : Shareds) { 943 ClausesData.emplace_back(Ref, *IPriv, *IRed); 944 std::advance(IPriv, 1); 945 std::advance(IRed, 1); 946 } 947 } 948 949 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 950 assert(SharedAddresses.size() == N && 951 "Number of generated lvalues must be exactly N."); 952 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 953 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 954 SharedAddresses.emplace_back(First, Second); 955 } 956 957 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 958 const auto *PrivateVD = 959 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 960 QualType PrivateType = PrivateVD->getType(); 961 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 962 if (!PrivateType->isVariablyModifiedType()) { 963 Sizes.emplace_back( 964 CGF.getTypeSize( 965 SharedAddresses[N].first.getType().getNonReferenceType()), 966 nullptr); 967 return; 968 } 969 llvm::Value *Size; 970 llvm::Value *SizeInChars; 971 auto *ElemType = 972 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 973 ->getElementType(); 974 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 975 if (AsArraySection) { 976 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 977 SharedAddresses[N].first.getPointer()); 978 Size = CGF.Builder.CreateNUWAdd( 979 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 980 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 981 } else { 982 SizeInChars = CGF.getTypeSize( 983 SharedAddresses[N].first.getType().getNonReferenceType()); 984 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 985 } 986 Sizes.emplace_back(SizeInChars, Size); 987 CodeGenFunction::OpaqueValueMapping OpaqueMap( 988 CGF, 989 cast<OpaqueValueExpr>( 990 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 991 RValue::get(Size)); 992 CGF.EmitVariablyModifiedType(PrivateType); 993 } 994 995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 996 llvm::Value *Size) { 997 const auto *PrivateVD = 998 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 999 QualType PrivateType = PrivateVD->getType(); 1000 if (!PrivateType->isVariablyModifiedType()) { 1001 assert(!Size && !Sizes[N].second && 1002 "Size should be nullptr for non-variably modified reduction " 1003 "items."); 1004 return; 1005 } 1006 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1007 CGF, 1008 cast<OpaqueValueExpr>( 1009 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1010 RValue::get(Size)); 1011 CGF.EmitVariablyModifiedType(PrivateType); 1012 } 1013 1014 void ReductionCodeGen::emitInitialization( 1015 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1016 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1017 assert(SharedAddresses.size() > N && "No variable was generated"); 1018 const auto *PrivateVD = 1019 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 QualType PrivateType = PrivateVD->getType(); 1023 PrivateAddr = CGF.Builder.CreateElementBitCast( 1024 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1025 QualType SharedType = SharedAddresses[N].first.getType(); 1026 SharedLVal = CGF.MakeAddrLValue( 1027 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1028 CGF.ConvertTypeForMem(SharedType)), 1029 SharedType, SharedAddresses[N].first.getBaseInfo(), 1030 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1031 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1032 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1033 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1034 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1035 PrivateAddr, SharedLVal.getAddress(), 1036 SharedLVal.getType()); 1037 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1038 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1039 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1040 PrivateVD->getType().getQualifiers(), 1041 /*IsInitializer=*/false); 1042 } 1043 } 1044 1045 bool ReductionCodeGen::needCleanups(unsigned N) { 1046 const auto *PrivateVD = 1047 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1048 QualType PrivateType = PrivateVD->getType(); 1049 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1050 return DTorKind != QualType::DK_none; 1051 } 1052 1053 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1054 Address PrivateAddr) { 1055 const auto *PrivateVD = 1056 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1057 QualType PrivateType = PrivateVD->getType(); 1058 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1059 if (needCleanups(N)) { 1060 PrivateAddr = CGF.Builder.CreateElementBitCast( 1061 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1062 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1063 } 1064 } 1065 1066 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1067 LValue BaseLV) { 1068 BaseTy = BaseTy.getNonReferenceType(); 1069 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1070 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1071 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1072 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1073 } else { 1074 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1075 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1076 } 1077 BaseTy = BaseTy->getPointeeType(); 1078 } 1079 return CGF.MakeAddrLValue( 1080 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1081 CGF.ConvertTypeForMem(ElTy)), 1082 BaseLV.getType(), BaseLV.getBaseInfo(), 1083 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1084 } 1085 1086 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1087 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1088 llvm::Value *Addr) { 1089 Address Tmp = Address::invalid(); 1090 Address TopTmp = Address::invalid(); 1091 Address MostTopTmp = Address::invalid(); 1092 BaseTy = BaseTy.getNonReferenceType(); 1093 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1094 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1095 Tmp = CGF.CreateMemTemp(BaseTy); 1096 if (TopTmp.isValid()) 1097 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1098 else 1099 MostTopTmp = Tmp; 1100 TopTmp = Tmp; 1101 BaseTy = BaseTy->getPointeeType(); 1102 } 1103 llvm::Type *Ty = BaseLVType; 1104 if (Tmp.isValid()) 1105 Ty = Tmp.getElementType(); 1106 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1107 if (Tmp.isValid()) { 1108 CGF.Builder.CreateStore(Addr, Tmp); 1109 return MostTopTmp; 1110 } 1111 return Address(Addr, BaseLVAlignment); 1112 } 1113 1114 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1115 const VarDecl *OrigVD = nullptr; 1116 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1117 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1118 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1119 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1120 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1121 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1122 DE = cast<DeclRefExpr>(Base); 1123 OrigVD = cast<VarDecl>(DE->getDecl()); 1124 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1125 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1126 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1127 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1128 DE = cast<DeclRefExpr>(Base); 1129 OrigVD = cast<VarDecl>(DE->getDecl()); 1130 } 1131 return OrigVD; 1132 } 1133 1134 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1135 Address PrivateAddr) { 1136 const DeclRefExpr *DE; 1137 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1138 BaseDecls.emplace_back(OrigVD); 1139 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1140 LValue BaseLValue = 1141 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1142 OriginalBaseLValue); 1143 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1144 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1145 llvm::Value *PrivatePointer = 1146 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1147 PrivateAddr.getPointer(), 1148 SharedAddresses[N].first.getAddress().getType()); 1149 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1150 return castToBase(CGF, OrigVD->getType(), 1151 SharedAddresses[N].first.getType(), 1152 OriginalBaseLValue.getAddress().getType(), 1153 OriginalBaseLValue.getAlignment(), Ptr); 1154 } 1155 BaseDecls.emplace_back( 1156 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1157 return PrivateAddr; 1158 } 1159 1160 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1161 const OMPDeclareReductionDecl *DRD = 1162 getReductionInit(ClausesData[N].ReductionOp); 1163 return DRD && DRD->getInitializer(); 1164 } 1165 1166 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1167 return CGF.EmitLoadOfPointerLValue( 1168 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1169 getThreadIDVariable()->getType()->castAs<PointerType>()); 1170 } 1171 1172 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1173 if (!CGF.HaveInsertPoint()) 1174 return; 1175 // 1.2.2 OpenMP Language Terminology 1176 // Structured block - An executable statement with a single entry at the 1177 // top and a single exit at the bottom. 1178 // The point of exit cannot be a branch out of the structured block. 1179 // longjmp() and throw() must not violate the entry/exit criteria. 1180 CGF.EHStack.pushTerminate(); 1181 CodeGen(CGF); 1182 CGF.EHStack.popTerminate(); 1183 } 1184 1185 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1186 CodeGenFunction &CGF) { 1187 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1188 getThreadIDVariable()->getType(), 1189 AlignmentSource::Decl); 1190 } 1191 1192 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1193 QualType FieldTy) { 1194 auto *Field = FieldDecl::Create( 1195 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1196 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1197 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1198 Field->setAccess(AS_public); 1199 DC->addDecl(Field); 1200 return Field; 1201 } 1202 1203 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1204 StringRef Separator) 1205 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1206 OffloadEntriesInfoManager(CGM) { 1207 ASTContext &C = CGM.getContext(); 1208 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1209 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1210 RD->startDefinition(); 1211 // reserved_1 1212 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1213 // flags 1214 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1215 // reserved_2 1216 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1217 // reserved_3 1218 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1219 // psource 1220 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1221 RD->completeDefinition(); 1222 IdentQTy = C.getRecordType(RD); 1223 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1224 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1225 1226 loadOffloadInfoMetadata(); 1227 } 1228 1229 void CGOpenMPRuntime::clear() { 1230 InternalVars.clear(); 1231 // Clean non-target variable declarations possibly used only in debug info. 1232 for (const auto &Data : EmittedNonTargetVariables) { 1233 if (!Data.getValue().pointsToAliveValue()) 1234 continue; 1235 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1236 if (!GV) 1237 continue; 1238 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1239 continue; 1240 GV->eraseFromParent(); 1241 } 1242 } 1243 1244 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1245 SmallString<128> Buffer; 1246 llvm::raw_svector_ostream OS(Buffer); 1247 StringRef Sep = FirstSeparator; 1248 for (StringRef Part : Parts) { 1249 OS << Sep << Part; 1250 Sep = Separator; 1251 } 1252 return OS.str(); 1253 } 1254 1255 static llvm::Function * 1256 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1257 const Expr *CombinerInitializer, const VarDecl *In, 1258 const VarDecl *Out, bool IsCombiner) { 1259 // void .omp_combiner.(Ty *in, Ty *out); 1260 ASTContext &C = CGM.getContext(); 1261 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1262 FunctionArgList Args; 1263 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1264 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1265 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1266 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1267 Args.push_back(&OmpOutParm); 1268 Args.push_back(&OmpInParm); 1269 const CGFunctionInfo &FnInfo = 1270 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1271 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1272 std::string Name = CGM.getOpenMPRuntime().getName( 1273 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1274 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1275 Name, &CGM.getModule()); 1276 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1277 Fn->removeFnAttr(llvm::Attribute::NoInline); 1278 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1279 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1280 CodeGenFunction CGF(CGM); 1281 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1282 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1283 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1284 Out->getLocation()); 1285 CodeGenFunction::OMPPrivateScope Scope(CGF); 1286 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1287 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1288 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1289 .getAddress(); 1290 }); 1291 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1292 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1293 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1294 .getAddress(); 1295 }); 1296 (void)Scope.Privatize(); 1297 if (!IsCombiner && Out->hasInit() && 1298 !CGF.isTrivialInitializer(Out->getInit())) { 1299 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1300 Out->getType().getQualifiers(), 1301 /*IsInitializer=*/true); 1302 } 1303 if (CombinerInitializer) 1304 CGF.EmitIgnoredExpr(CombinerInitializer); 1305 Scope.ForceCleanup(); 1306 CGF.FinishFunction(); 1307 return Fn; 1308 } 1309 1310 void CGOpenMPRuntime::emitUserDefinedReduction( 1311 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1312 if (UDRMap.count(D) > 0) 1313 return; 1314 llvm::Function *Combiner = emitCombinerOrInitializer( 1315 CGM, D->getType(), D->getCombiner(), 1316 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1317 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1318 /*IsCombiner=*/true); 1319 llvm::Function *Initializer = nullptr; 1320 if (const Expr *Init = D->getInitializer()) { 1321 Initializer = emitCombinerOrInitializer( 1322 CGM, D->getType(), 1323 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1324 : nullptr, 1325 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1326 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1327 /*IsCombiner=*/false); 1328 } 1329 UDRMap.try_emplace(D, Combiner, Initializer); 1330 if (CGF) { 1331 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1332 Decls.second.push_back(D); 1333 } 1334 } 1335 1336 std::pair<llvm::Function *, llvm::Function *> 1337 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1338 auto I = UDRMap.find(D); 1339 if (I != UDRMap.end()) 1340 return I->second; 1341 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1342 return UDRMap.lookup(D); 1343 } 1344 1345 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1346 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1347 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1348 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1349 assert(ThreadIDVar->getType()->isPointerType() && 1350 "thread id variable must be of type kmp_int32 *"); 1351 CodeGenFunction CGF(CGM, true); 1352 bool HasCancel = false; 1353 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1354 HasCancel = OPD->hasCancel(); 1355 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1356 HasCancel = OPSD->hasCancel(); 1357 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1358 HasCancel = OPFD->hasCancel(); 1359 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1360 HasCancel = OPFD->hasCancel(); 1361 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1362 HasCancel = OPFD->hasCancel(); 1363 else if (const auto *OPFD = 1364 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1365 HasCancel = OPFD->hasCancel(); 1366 else if (const auto *OPFD = 1367 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1368 HasCancel = OPFD->hasCancel(); 1369 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1370 HasCancel, OutlinedHelperName); 1371 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1372 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1373 } 1374 1375 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1376 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1377 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1378 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1379 return emitParallelOrTeamsOutlinedFunction( 1380 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1381 } 1382 1383 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1384 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1385 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1386 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1387 return emitParallelOrTeamsOutlinedFunction( 1388 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1389 } 1390 1391 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1392 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1393 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1394 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1395 bool Tied, unsigned &NumberOfParts) { 1396 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1397 PrePostActionTy &) { 1398 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1399 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1400 llvm::Value *TaskArgs[] = { 1401 UpLoc, ThreadID, 1402 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1403 TaskTVar->getType()->castAs<PointerType>()) 1404 .getPointer()}; 1405 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1406 }; 1407 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1408 UntiedCodeGen); 1409 CodeGen.setAction(Action); 1410 assert(!ThreadIDVar->getType()->isPointerType() && 1411 "thread id variable must be of type kmp_int32 for tasks"); 1412 const OpenMPDirectiveKind Region = 1413 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1414 : OMPD_task; 1415 const CapturedStmt *CS = D.getCapturedStmt(Region); 1416 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1417 CodeGenFunction CGF(CGM, true); 1418 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1419 InnermostKind, 1420 TD ? TD->hasCancel() : false, Action); 1421 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1422 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1423 if (!Tied) 1424 NumberOfParts = Action.getNumberOfParts(); 1425 return Res; 1426 } 1427 1428 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1429 const RecordDecl *RD, const CGRecordLayout &RL, 1430 ArrayRef<llvm::Constant *> Data) { 1431 llvm::StructType *StructTy = RL.getLLVMType(); 1432 unsigned PrevIdx = 0; 1433 ConstantInitBuilder CIBuilder(CGM); 1434 auto DI = Data.begin(); 1435 for (const FieldDecl *FD : RD->fields()) { 1436 unsigned Idx = RL.getLLVMFieldNo(FD); 1437 // Fill the alignment. 1438 for (unsigned I = PrevIdx; I < Idx; ++I) 1439 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1440 PrevIdx = Idx + 1; 1441 Fields.add(*DI); 1442 ++DI; 1443 } 1444 } 1445 1446 template <class... As> 1447 static llvm::GlobalVariable * 1448 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1449 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1450 As &&... Args) { 1451 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1452 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1453 ConstantInitBuilder CIBuilder(CGM); 1454 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1455 buildStructValue(Fields, CGM, RD, RL, Data); 1456 return Fields.finishAndCreateGlobal( 1457 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1458 std::forward<As>(Args)...); 1459 } 1460 1461 template <typename T> 1462 static void 1463 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1464 ArrayRef<llvm::Constant *> Data, 1465 T &Parent) { 1466 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1467 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1468 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1469 buildStructValue(Fields, CGM, RD, RL, Data); 1470 Fields.finishAndAddTo(Parent); 1471 } 1472 1473 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1474 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1475 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1476 FlagsTy FlagsKey(Flags, Reserved2Flags); 1477 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1478 if (!Entry) { 1479 if (!DefaultOpenMPPSource) { 1480 // Initialize default location for psource field of ident_t structure of 1481 // all ident_t objects. Format is ";file;function;line;column;;". 1482 // Taken from 1483 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1484 DefaultOpenMPPSource = 1485 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1486 DefaultOpenMPPSource = 1487 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1488 } 1489 1490 llvm::Constant *Data[] = { 1491 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1492 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1493 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1494 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1495 llvm::GlobalValue *DefaultOpenMPLocation = 1496 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1497 llvm::GlobalValue::PrivateLinkage); 1498 DefaultOpenMPLocation->setUnnamedAddr( 1499 llvm::GlobalValue::UnnamedAddr::Global); 1500 1501 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1502 } 1503 return Address(Entry, Align); 1504 } 1505 1506 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1507 bool AtCurrentPoint) { 1508 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1509 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1510 1511 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1512 if (AtCurrentPoint) { 1513 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1514 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1515 } else { 1516 Elem.second.ServiceInsertPt = 1517 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1518 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1519 } 1520 } 1521 1522 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1523 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1524 if (Elem.second.ServiceInsertPt) { 1525 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1526 Elem.second.ServiceInsertPt = nullptr; 1527 Ptr->eraseFromParent(); 1528 } 1529 } 1530 1531 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1532 SourceLocation Loc, 1533 unsigned Flags) { 1534 Flags |= OMP_IDENT_KMPC; 1535 // If no debug info is generated - return global default location. 1536 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1537 Loc.isInvalid()) 1538 return getOrCreateDefaultLocation(Flags).getPointer(); 1539 1540 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1541 1542 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1543 Address LocValue = Address::invalid(); 1544 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1545 if (I != OpenMPLocThreadIDMap.end()) 1546 LocValue = Address(I->second.DebugLoc, Align); 1547 1548 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1549 // GetOpenMPThreadID was called before this routine. 1550 if (!LocValue.isValid()) { 1551 // Generate "ident_t .kmpc_loc.addr;" 1552 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1553 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1554 Elem.second.DebugLoc = AI.getPointer(); 1555 LocValue = AI; 1556 1557 if (!Elem.second.ServiceInsertPt) 1558 setLocThreadIdInsertPt(CGF); 1559 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1560 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1561 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1562 CGF.getTypeSize(IdentQTy)); 1563 } 1564 1565 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1566 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1567 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1568 LValue PSource = 1569 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1570 1571 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1572 if (OMPDebugLoc == nullptr) { 1573 SmallString<128> Buffer2; 1574 llvm::raw_svector_ostream OS2(Buffer2); 1575 // Build debug location 1576 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1577 OS2 << ";" << PLoc.getFilename() << ";"; 1578 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1579 OS2 << FD->getQualifiedNameAsString(); 1580 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1581 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1582 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1583 } 1584 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1585 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1586 1587 // Our callers always pass this to a runtime function, so for 1588 // convenience, go ahead and return a naked pointer. 1589 return LocValue.getPointer(); 1590 } 1591 1592 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1593 SourceLocation Loc) { 1594 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1595 1596 llvm::Value *ThreadID = nullptr; 1597 // Check whether we've already cached a load of the thread id in this 1598 // function. 1599 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1600 if (I != OpenMPLocThreadIDMap.end()) { 1601 ThreadID = I->second.ThreadID; 1602 if (ThreadID != nullptr) 1603 return ThreadID; 1604 } 1605 // If exceptions are enabled, do not use parameter to avoid possible crash. 1606 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1607 !CGF.getLangOpts().CXXExceptions || 1608 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1609 if (auto *OMPRegionInfo = 1610 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1611 if (OMPRegionInfo->getThreadIDVariable()) { 1612 // Check if this an outlined function with thread id passed as argument. 1613 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1614 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1615 // If value loaded in entry block, cache it and use it everywhere in 1616 // function. 1617 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1618 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1619 Elem.second.ThreadID = ThreadID; 1620 } 1621 return ThreadID; 1622 } 1623 } 1624 } 1625 1626 // This is not an outlined function region - need to call __kmpc_int32 1627 // kmpc_global_thread_num(ident_t *loc). 1628 // Generate thread id value and cache this value for use across the 1629 // function. 1630 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1631 if (!Elem.second.ServiceInsertPt) 1632 setLocThreadIdInsertPt(CGF); 1633 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1634 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1635 llvm::CallInst *Call = CGF.Builder.CreateCall( 1636 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1637 emitUpdateLocation(CGF, Loc)); 1638 Call->setCallingConv(CGF.getRuntimeCC()); 1639 Elem.second.ThreadID = Call; 1640 return Call; 1641 } 1642 1643 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1644 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1645 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1646 clearLocThreadIdInsertPt(CGF); 1647 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1648 } 1649 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1650 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1651 UDRMap.erase(D); 1652 FunctionUDRMap.erase(CGF.CurFn); 1653 } 1654 } 1655 1656 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1657 return IdentTy->getPointerTo(); 1658 } 1659 1660 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1661 if (!Kmpc_MicroTy) { 1662 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1663 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1664 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1665 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1666 } 1667 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1668 } 1669 1670 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1671 llvm::FunctionCallee RTLFn = nullptr; 1672 switch (static_cast<OpenMPRTLFunction>(Function)) { 1673 case OMPRTL__kmpc_fork_call: { 1674 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1675 // microtask, ...); 1676 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1677 getKmpc_MicroPointerTy()}; 1678 auto *FnTy = 1679 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1680 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1681 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1682 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1683 llvm::LLVMContext &Ctx = F->getContext(); 1684 llvm::MDBuilder MDB(Ctx); 1685 // Annotate the callback behavior of the __kmpc_fork_call: 1686 // - The callback callee is argument number 2 (microtask). 1687 // - The first two arguments of the callback callee are unknown (-1). 1688 // - All variadic arguments to the __kmpc_fork_call are passed to the 1689 // callback callee. 1690 F->addMetadata( 1691 llvm::LLVMContext::MD_callback, 1692 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1693 2, {-1, -1}, 1694 /* VarArgsArePassed */ true)})); 1695 } 1696 } 1697 break; 1698 } 1699 case OMPRTL__kmpc_global_thread_num: { 1700 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1701 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1702 auto *FnTy = 1703 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1704 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1705 break; 1706 } 1707 case OMPRTL__kmpc_threadprivate_cached: { 1708 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1709 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1710 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1711 CGM.VoidPtrTy, CGM.SizeTy, 1712 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1713 auto *FnTy = 1714 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1715 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1716 break; 1717 } 1718 case OMPRTL__kmpc_critical: { 1719 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1720 // kmp_critical_name *crit); 1721 llvm::Type *TypeParams[] = { 1722 getIdentTyPointerTy(), CGM.Int32Ty, 1723 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1724 auto *FnTy = 1725 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1726 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1727 break; 1728 } 1729 case OMPRTL__kmpc_critical_with_hint: { 1730 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1731 // kmp_critical_name *crit, uintptr_t hint); 1732 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1733 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1734 CGM.IntPtrTy}; 1735 auto *FnTy = 1736 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1737 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1738 break; 1739 } 1740 case OMPRTL__kmpc_threadprivate_register: { 1741 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1742 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1743 // typedef void *(*kmpc_ctor)(void *); 1744 auto *KmpcCtorTy = 1745 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1746 /*isVarArg*/ false)->getPointerTo(); 1747 // typedef void *(*kmpc_cctor)(void *, void *); 1748 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1749 auto *KmpcCopyCtorTy = 1750 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1751 /*isVarArg*/ false) 1752 ->getPointerTo(); 1753 // typedef void (*kmpc_dtor)(void *); 1754 auto *KmpcDtorTy = 1755 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1756 ->getPointerTo(); 1757 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1758 KmpcCopyCtorTy, KmpcDtorTy}; 1759 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1760 /*isVarArg*/ false); 1761 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1762 break; 1763 } 1764 case OMPRTL__kmpc_end_critical: { 1765 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1766 // kmp_critical_name *crit); 1767 llvm::Type *TypeParams[] = { 1768 getIdentTyPointerTy(), CGM.Int32Ty, 1769 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1770 auto *FnTy = 1771 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1772 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1773 break; 1774 } 1775 case OMPRTL__kmpc_cancel_barrier: { 1776 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1777 // global_tid); 1778 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1779 auto *FnTy = 1780 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1781 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1782 break; 1783 } 1784 case OMPRTL__kmpc_barrier: { 1785 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1786 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1787 auto *FnTy = 1788 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1789 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1790 break; 1791 } 1792 case OMPRTL__kmpc_for_static_fini: { 1793 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1794 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1795 auto *FnTy = 1796 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1797 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1798 break; 1799 } 1800 case OMPRTL__kmpc_push_num_threads: { 1801 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1802 // kmp_int32 num_threads) 1803 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1804 CGM.Int32Ty}; 1805 auto *FnTy = 1806 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1807 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1808 break; 1809 } 1810 case OMPRTL__kmpc_serialized_parallel: { 1811 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1812 // global_tid); 1813 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1814 auto *FnTy = 1815 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1816 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1817 break; 1818 } 1819 case OMPRTL__kmpc_end_serialized_parallel: { 1820 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1821 // global_tid); 1822 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1823 auto *FnTy = 1824 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1825 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1826 break; 1827 } 1828 case OMPRTL__kmpc_flush: { 1829 // Build void __kmpc_flush(ident_t *loc); 1830 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1831 auto *FnTy = 1832 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1833 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1834 break; 1835 } 1836 case OMPRTL__kmpc_master: { 1837 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1838 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1839 auto *FnTy = 1840 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1841 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1842 break; 1843 } 1844 case OMPRTL__kmpc_end_master: { 1845 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1846 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1847 auto *FnTy = 1848 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1849 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1850 break; 1851 } 1852 case OMPRTL__kmpc_omp_taskyield: { 1853 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1854 // int end_part); 1855 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1856 auto *FnTy = 1857 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1858 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1859 break; 1860 } 1861 case OMPRTL__kmpc_single: { 1862 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1863 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1864 auto *FnTy = 1865 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1866 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1867 break; 1868 } 1869 case OMPRTL__kmpc_end_single: { 1870 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1871 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1872 auto *FnTy = 1873 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1874 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1875 break; 1876 } 1877 case OMPRTL__kmpc_omp_task_alloc: { 1878 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1879 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1880 // kmp_routine_entry_t *task_entry); 1881 assert(KmpRoutineEntryPtrTy != nullptr && 1882 "Type kmp_routine_entry_t must be created."); 1883 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1884 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1885 // Return void * and then cast to particular kmp_task_t type. 1886 auto *FnTy = 1887 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1888 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1889 break; 1890 } 1891 case OMPRTL__kmpc_omp_task: { 1892 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1893 // *new_task); 1894 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1895 CGM.VoidPtrTy}; 1896 auto *FnTy = 1897 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1898 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1899 break; 1900 } 1901 case OMPRTL__kmpc_copyprivate: { 1902 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1903 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1904 // kmp_int32 didit); 1905 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1906 auto *CpyFnTy = 1907 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1908 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1909 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1910 CGM.Int32Ty}; 1911 auto *FnTy = 1912 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1913 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1914 break; 1915 } 1916 case OMPRTL__kmpc_reduce: { 1917 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1918 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1919 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1920 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1921 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1922 /*isVarArg=*/false); 1923 llvm::Type *TypeParams[] = { 1924 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1925 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1926 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1927 auto *FnTy = 1928 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1929 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1930 break; 1931 } 1932 case OMPRTL__kmpc_reduce_nowait: { 1933 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1934 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1935 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1936 // *lck); 1937 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1938 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1939 /*isVarArg=*/false); 1940 llvm::Type *TypeParams[] = { 1941 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1942 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1943 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1944 auto *FnTy = 1945 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1946 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1947 break; 1948 } 1949 case OMPRTL__kmpc_end_reduce: { 1950 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1951 // kmp_critical_name *lck); 1952 llvm::Type *TypeParams[] = { 1953 getIdentTyPointerTy(), CGM.Int32Ty, 1954 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1955 auto *FnTy = 1956 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1957 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1958 break; 1959 } 1960 case OMPRTL__kmpc_end_reduce_nowait: { 1961 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1962 // kmp_critical_name *lck); 1963 llvm::Type *TypeParams[] = { 1964 getIdentTyPointerTy(), CGM.Int32Ty, 1965 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1966 auto *FnTy = 1967 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1968 RTLFn = 1969 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1970 break; 1971 } 1972 case OMPRTL__kmpc_omp_task_begin_if0: { 1973 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1974 // *new_task); 1975 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1976 CGM.VoidPtrTy}; 1977 auto *FnTy = 1978 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1979 RTLFn = 1980 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1981 break; 1982 } 1983 case OMPRTL__kmpc_omp_task_complete_if0: { 1984 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1985 // *new_task); 1986 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1987 CGM.VoidPtrTy}; 1988 auto *FnTy = 1989 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1990 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1991 /*Name=*/"__kmpc_omp_task_complete_if0"); 1992 break; 1993 } 1994 case OMPRTL__kmpc_ordered: { 1995 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1996 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1997 auto *FnTy = 1998 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1999 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2000 break; 2001 } 2002 case OMPRTL__kmpc_end_ordered: { 2003 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2004 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2005 auto *FnTy = 2006 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2007 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2008 break; 2009 } 2010 case OMPRTL__kmpc_omp_taskwait: { 2011 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2012 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2013 auto *FnTy = 2014 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2015 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2016 break; 2017 } 2018 case OMPRTL__kmpc_taskgroup: { 2019 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2020 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2021 auto *FnTy = 2022 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2023 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2024 break; 2025 } 2026 case OMPRTL__kmpc_end_taskgroup: { 2027 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2028 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2029 auto *FnTy = 2030 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2031 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2032 break; 2033 } 2034 case OMPRTL__kmpc_push_proc_bind: { 2035 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2036 // int proc_bind) 2037 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2038 auto *FnTy = 2039 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2040 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2041 break; 2042 } 2043 case OMPRTL__kmpc_omp_task_with_deps: { 2044 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2045 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2046 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2047 llvm::Type *TypeParams[] = { 2048 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2049 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2050 auto *FnTy = 2051 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2052 RTLFn = 2053 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2054 break; 2055 } 2056 case OMPRTL__kmpc_omp_wait_deps: { 2057 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2058 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2059 // kmp_depend_info_t *noalias_dep_list); 2060 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2061 CGM.Int32Ty, CGM.VoidPtrTy, 2062 CGM.Int32Ty, CGM.VoidPtrTy}; 2063 auto *FnTy = 2064 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2065 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2066 break; 2067 } 2068 case OMPRTL__kmpc_cancellationpoint: { 2069 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2070 // global_tid, kmp_int32 cncl_kind) 2071 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2072 auto *FnTy = 2073 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2074 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2075 break; 2076 } 2077 case OMPRTL__kmpc_cancel: { 2078 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2079 // kmp_int32 cncl_kind) 2080 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2081 auto *FnTy = 2082 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2083 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2084 break; 2085 } 2086 case OMPRTL__kmpc_push_num_teams: { 2087 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2088 // kmp_int32 num_teams, kmp_int32 num_threads) 2089 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2090 CGM.Int32Ty}; 2091 auto *FnTy = 2092 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2093 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2094 break; 2095 } 2096 case OMPRTL__kmpc_fork_teams: { 2097 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2098 // microtask, ...); 2099 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2100 getKmpc_MicroPointerTy()}; 2101 auto *FnTy = 2102 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2103 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2104 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2105 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2106 llvm::LLVMContext &Ctx = F->getContext(); 2107 llvm::MDBuilder MDB(Ctx); 2108 // Annotate the callback behavior of the __kmpc_fork_teams: 2109 // - The callback callee is argument number 2 (microtask). 2110 // - The first two arguments of the callback callee are unknown (-1). 2111 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2112 // callback callee. 2113 F->addMetadata( 2114 llvm::LLVMContext::MD_callback, 2115 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2116 2, {-1, -1}, 2117 /* VarArgsArePassed */ true)})); 2118 } 2119 } 2120 break; 2121 } 2122 case OMPRTL__kmpc_taskloop: { 2123 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2124 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2125 // sched, kmp_uint64 grainsize, void *task_dup); 2126 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2127 CGM.IntTy, 2128 CGM.VoidPtrTy, 2129 CGM.IntTy, 2130 CGM.Int64Ty->getPointerTo(), 2131 CGM.Int64Ty->getPointerTo(), 2132 CGM.Int64Ty, 2133 CGM.IntTy, 2134 CGM.IntTy, 2135 CGM.Int64Ty, 2136 CGM.VoidPtrTy}; 2137 auto *FnTy = 2138 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2139 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2140 break; 2141 } 2142 case OMPRTL__kmpc_doacross_init: { 2143 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2144 // num_dims, struct kmp_dim *dims); 2145 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2146 CGM.Int32Ty, 2147 CGM.Int32Ty, 2148 CGM.VoidPtrTy}; 2149 auto *FnTy = 2150 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2151 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2152 break; 2153 } 2154 case OMPRTL__kmpc_doacross_fini: { 2155 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2156 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2157 auto *FnTy = 2158 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2159 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2160 break; 2161 } 2162 case OMPRTL__kmpc_doacross_post: { 2163 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2164 // *vec); 2165 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2166 CGM.Int64Ty->getPointerTo()}; 2167 auto *FnTy = 2168 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2169 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2170 break; 2171 } 2172 case OMPRTL__kmpc_doacross_wait: { 2173 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2174 // *vec); 2175 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2176 CGM.Int64Ty->getPointerTo()}; 2177 auto *FnTy = 2178 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2179 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2180 break; 2181 } 2182 case OMPRTL__kmpc_task_reduction_init: { 2183 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2184 // *data); 2185 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2186 auto *FnTy = 2187 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2188 RTLFn = 2189 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2190 break; 2191 } 2192 case OMPRTL__kmpc_task_reduction_get_th_data: { 2193 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2194 // *d); 2195 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2196 auto *FnTy = 2197 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2198 RTLFn = CGM.CreateRuntimeFunction( 2199 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2200 break; 2201 } 2202 case OMPRTL__kmpc_alloc: { 2203 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2204 // al); omp_allocator_handle_t type is void *. 2205 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2206 auto *FnTy = 2207 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2208 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2209 break; 2210 } 2211 case OMPRTL__kmpc_free: { 2212 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2213 // al); omp_allocator_handle_t type is void *. 2214 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2215 auto *FnTy = 2216 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2217 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2218 break; 2219 } 2220 case OMPRTL__kmpc_push_target_tripcount: { 2221 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2222 // size); 2223 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2224 llvm::FunctionType *FnTy = 2225 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2226 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2227 break; 2228 } 2229 case OMPRTL__tgt_target: { 2230 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2231 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2232 // *arg_types); 2233 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2234 CGM.VoidPtrTy, 2235 CGM.Int32Ty, 2236 CGM.VoidPtrPtrTy, 2237 CGM.VoidPtrPtrTy, 2238 CGM.SizeTy->getPointerTo(), 2239 CGM.Int64Ty->getPointerTo()}; 2240 auto *FnTy = 2241 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2242 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2243 break; 2244 } 2245 case OMPRTL__tgt_target_nowait: { 2246 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2247 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2248 // int64_t *arg_types); 2249 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2250 CGM.VoidPtrTy, 2251 CGM.Int32Ty, 2252 CGM.VoidPtrPtrTy, 2253 CGM.VoidPtrPtrTy, 2254 CGM.SizeTy->getPointerTo(), 2255 CGM.Int64Ty->getPointerTo()}; 2256 auto *FnTy = 2257 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2258 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2259 break; 2260 } 2261 case OMPRTL__tgt_target_teams: { 2262 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2263 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2264 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2265 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2266 CGM.VoidPtrTy, 2267 CGM.Int32Ty, 2268 CGM.VoidPtrPtrTy, 2269 CGM.VoidPtrPtrTy, 2270 CGM.SizeTy->getPointerTo(), 2271 CGM.Int64Ty->getPointerTo(), 2272 CGM.Int32Ty, 2273 CGM.Int32Ty}; 2274 auto *FnTy = 2275 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2276 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2277 break; 2278 } 2279 case OMPRTL__tgt_target_teams_nowait: { 2280 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2281 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2282 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2283 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2284 CGM.VoidPtrTy, 2285 CGM.Int32Ty, 2286 CGM.VoidPtrPtrTy, 2287 CGM.VoidPtrPtrTy, 2288 CGM.SizeTy->getPointerTo(), 2289 CGM.Int64Ty->getPointerTo(), 2290 CGM.Int32Ty, 2291 CGM.Int32Ty}; 2292 auto *FnTy = 2293 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2294 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2295 break; 2296 } 2297 case OMPRTL__tgt_register_lib: { 2298 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2299 QualType ParamTy = 2300 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2301 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2302 auto *FnTy = 2303 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2304 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2305 break; 2306 } 2307 case OMPRTL__tgt_unregister_lib: { 2308 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2309 QualType ParamTy = 2310 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2311 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2312 auto *FnTy = 2313 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2314 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2315 break; 2316 } 2317 case OMPRTL__tgt_target_data_begin: { 2318 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2319 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2320 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2321 CGM.Int32Ty, 2322 CGM.VoidPtrPtrTy, 2323 CGM.VoidPtrPtrTy, 2324 CGM.SizeTy->getPointerTo(), 2325 CGM.Int64Ty->getPointerTo()}; 2326 auto *FnTy = 2327 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2328 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2329 break; 2330 } 2331 case OMPRTL__tgt_target_data_begin_nowait: { 2332 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2333 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2334 // *arg_types); 2335 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2336 CGM.Int32Ty, 2337 CGM.VoidPtrPtrTy, 2338 CGM.VoidPtrPtrTy, 2339 CGM.SizeTy->getPointerTo(), 2340 CGM.Int64Ty->getPointerTo()}; 2341 auto *FnTy = 2342 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2343 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2344 break; 2345 } 2346 case OMPRTL__tgt_target_data_end: { 2347 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2348 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2349 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2350 CGM.Int32Ty, 2351 CGM.VoidPtrPtrTy, 2352 CGM.VoidPtrPtrTy, 2353 CGM.SizeTy->getPointerTo(), 2354 CGM.Int64Ty->getPointerTo()}; 2355 auto *FnTy = 2356 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2357 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2358 break; 2359 } 2360 case OMPRTL__tgt_target_data_end_nowait: { 2361 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2362 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2363 // *arg_types); 2364 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2365 CGM.Int32Ty, 2366 CGM.VoidPtrPtrTy, 2367 CGM.VoidPtrPtrTy, 2368 CGM.SizeTy->getPointerTo(), 2369 CGM.Int64Ty->getPointerTo()}; 2370 auto *FnTy = 2371 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2372 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2373 break; 2374 } 2375 case OMPRTL__tgt_target_data_update: { 2376 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2377 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2378 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2379 CGM.Int32Ty, 2380 CGM.VoidPtrPtrTy, 2381 CGM.VoidPtrPtrTy, 2382 CGM.SizeTy->getPointerTo(), 2383 CGM.Int64Ty->getPointerTo()}; 2384 auto *FnTy = 2385 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2386 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2387 break; 2388 } 2389 case OMPRTL__tgt_target_data_update_nowait: { 2390 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2391 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2392 // *arg_types); 2393 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2394 CGM.Int32Ty, 2395 CGM.VoidPtrPtrTy, 2396 CGM.VoidPtrPtrTy, 2397 CGM.SizeTy->getPointerTo(), 2398 CGM.Int64Ty->getPointerTo()}; 2399 auto *FnTy = 2400 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2401 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2402 break; 2403 } 2404 } 2405 assert(RTLFn && "Unable to find OpenMP runtime function"); 2406 return RTLFn; 2407 } 2408 2409 llvm::FunctionCallee 2410 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2411 assert((IVSize == 32 || IVSize == 64) && 2412 "IV size is not compatible with the omp runtime"); 2413 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2414 : "__kmpc_for_static_init_4u") 2415 : (IVSigned ? "__kmpc_for_static_init_8" 2416 : "__kmpc_for_static_init_8u"); 2417 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2418 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2419 llvm::Type *TypeParams[] = { 2420 getIdentTyPointerTy(), // loc 2421 CGM.Int32Ty, // tid 2422 CGM.Int32Ty, // schedtype 2423 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2424 PtrTy, // p_lower 2425 PtrTy, // p_upper 2426 PtrTy, // p_stride 2427 ITy, // incr 2428 ITy // chunk 2429 }; 2430 auto *FnTy = 2431 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2432 return CGM.CreateRuntimeFunction(FnTy, Name); 2433 } 2434 2435 llvm::FunctionCallee 2436 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2437 assert((IVSize == 32 || IVSize == 64) && 2438 "IV size is not compatible with the omp runtime"); 2439 StringRef Name = 2440 IVSize == 32 2441 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2442 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2443 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2444 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2445 CGM.Int32Ty, // tid 2446 CGM.Int32Ty, // schedtype 2447 ITy, // lower 2448 ITy, // upper 2449 ITy, // stride 2450 ITy // chunk 2451 }; 2452 auto *FnTy = 2453 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2454 return CGM.CreateRuntimeFunction(FnTy, Name); 2455 } 2456 2457 llvm::FunctionCallee 2458 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2459 assert((IVSize == 32 || IVSize == 64) && 2460 "IV size is not compatible with the omp runtime"); 2461 StringRef Name = 2462 IVSize == 32 2463 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2464 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2465 llvm::Type *TypeParams[] = { 2466 getIdentTyPointerTy(), // loc 2467 CGM.Int32Ty, // tid 2468 }; 2469 auto *FnTy = 2470 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2471 return CGM.CreateRuntimeFunction(FnTy, Name); 2472 } 2473 2474 llvm::FunctionCallee 2475 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2476 assert((IVSize == 32 || IVSize == 64) && 2477 "IV size is not compatible with the omp runtime"); 2478 StringRef Name = 2479 IVSize == 32 2480 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2481 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2482 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2483 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2484 llvm::Type *TypeParams[] = { 2485 getIdentTyPointerTy(), // loc 2486 CGM.Int32Ty, // tid 2487 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2488 PtrTy, // p_lower 2489 PtrTy, // p_upper 2490 PtrTy // p_stride 2491 }; 2492 auto *FnTy = 2493 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2494 return CGM.CreateRuntimeFunction(FnTy, Name); 2495 } 2496 2497 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { 2498 if (CGM.getLangOpts().OpenMPSimd) 2499 return Address::invalid(); 2500 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2501 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2502 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { 2503 SmallString<64> PtrName; 2504 { 2505 llvm::raw_svector_ostream OS(PtrName); 2506 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; 2507 } 2508 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2509 if (!Ptr) { 2510 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2511 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2512 PtrName); 2513 if (!CGM.getLangOpts().OpenMPIsDevice) { 2514 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2515 GV->setLinkage(llvm::GlobalValue::ExternalLinkage); 2516 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2517 } 2518 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); 2519 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2520 } 2521 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2522 } 2523 return Address::invalid(); 2524 } 2525 2526 llvm::Constant * 2527 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2528 assert(!CGM.getLangOpts().OpenMPUseTLS || 2529 !CGM.getContext().getTargetInfo().isTLSSupported()); 2530 // Lookup the entry, lazily creating it if necessary. 2531 std::string Suffix = getName({"cache", ""}); 2532 return getOrCreateInternalVariable( 2533 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2534 } 2535 2536 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2537 const VarDecl *VD, 2538 Address VDAddr, 2539 SourceLocation Loc) { 2540 if (CGM.getLangOpts().OpenMPUseTLS && 2541 CGM.getContext().getTargetInfo().isTLSSupported()) 2542 return VDAddr; 2543 2544 llvm::Type *VarTy = VDAddr.getElementType(); 2545 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2546 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2547 CGM.Int8PtrTy), 2548 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2549 getOrCreateThreadPrivateCache(VD)}; 2550 return Address(CGF.EmitRuntimeCall( 2551 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2552 VDAddr.getAlignment()); 2553 } 2554 2555 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2556 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2557 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2558 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2559 // library. 2560 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2561 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2562 OMPLoc); 2563 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2564 // to register constructor/destructor for variable. 2565 llvm::Value *Args[] = { 2566 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2567 Ctor, CopyCtor, Dtor}; 2568 CGF.EmitRuntimeCall( 2569 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2570 } 2571 2572 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2573 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2574 bool PerformInit, CodeGenFunction *CGF) { 2575 if (CGM.getLangOpts().OpenMPUseTLS && 2576 CGM.getContext().getTargetInfo().isTLSSupported()) 2577 return nullptr; 2578 2579 VD = VD->getDefinition(CGM.getContext()); 2580 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2581 QualType ASTTy = VD->getType(); 2582 2583 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2584 const Expr *Init = VD->getAnyInitializer(); 2585 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2586 // Generate function that re-emits the declaration's initializer into the 2587 // threadprivate copy of the variable VD 2588 CodeGenFunction CtorCGF(CGM); 2589 FunctionArgList Args; 2590 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2591 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2592 ImplicitParamDecl::Other); 2593 Args.push_back(&Dst); 2594 2595 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2596 CGM.getContext().VoidPtrTy, Args); 2597 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2598 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2599 llvm::Function *Fn = 2600 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2601 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2602 Args, Loc, Loc); 2603 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2604 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2605 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2606 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2607 Arg = CtorCGF.Builder.CreateElementBitCast( 2608 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2609 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2610 /*IsInitializer=*/true); 2611 ArgVal = CtorCGF.EmitLoadOfScalar( 2612 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2613 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2614 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2615 CtorCGF.FinishFunction(); 2616 Ctor = Fn; 2617 } 2618 if (VD->getType().isDestructedType() != QualType::DK_none) { 2619 // Generate function that emits destructor call for the threadprivate copy 2620 // of the variable VD 2621 CodeGenFunction DtorCGF(CGM); 2622 FunctionArgList Args; 2623 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2624 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2625 ImplicitParamDecl::Other); 2626 Args.push_back(&Dst); 2627 2628 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2629 CGM.getContext().VoidTy, Args); 2630 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2631 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2632 llvm::Function *Fn = 2633 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2634 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2635 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2636 Loc, Loc); 2637 // Create a scope with an artificial location for the body of this function. 2638 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2639 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2640 DtorCGF.GetAddrOfLocalVar(&Dst), 2641 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2642 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2643 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2644 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2645 DtorCGF.FinishFunction(); 2646 Dtor = Fn; 2647 } 2648 // Do not emit init function if it is not required. 2649 if (!Ctor && !Dtor) 2650 return nullptr; 2651 2652 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2653 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2654 /*isVarArg=*/false) 2655 ->getPointerTo(); 2656 // Copying constructor for the threadprivate variable. 2657 // Must be NULL - reserved by runtime, but currently it requires that this 2658 // parameter is always NULL. Otherwise it fires assertion. 2659 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2660 if (Ctor == nullptr) { 2661 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2662 /*isVarArg=*/false) 2663 ->getPointerTo(); 2664 Ctor = llvm::Constant::getNullValue(CtorTy); 2665 } 2666 if (Dtor == nullptr) { 2667 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2668 /*isVarArg=*/false) 2669 ->getPointerTo(); 2670 Dtor = llvm::Constant::getNullValue(DtorTy); 2671 } 2672 if (!CGF) { 2673 auto *InitFunctionTy = 2674 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2675 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2676 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2677 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2678 CodeGenFunction InitCGF(CGM); 2679 FunctionArgList ArgList; 2680 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2681 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2682 Loc, Loc); 2683 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2684 InitCGF.FinishFunction(); 2685 return InitFunction; 2686 } 2687 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2688 } 2689 return nullptr; 2690 } 2691 2692 /// Obtain information that uniquely identifies a target entry. This 2693 /// consists of the file and device IDs as well as line number associated with 2694 /// the relevant entry source location. 2695 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2696 unsigned &DeviceID, unsigned &FileID, 2697 unsigned &LineNum) { 2698 SourceManager &SM = C.getSourceManager(); 2699 2700 // The loc should be always valid and have a file ID (the user cannot use 2701 // #pragma directives in macros) 2702 2703 assert(Loc.isValid() && "Source location is expected to be always valid."); 2704 2705 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2706 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2707 2708 llvm::sys::fs::UniqueID ID; 2709 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2710 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2711 << PLoc.getFilename() << EC.message(); 2712 2713 DeviceID = ID.getDevice(); 2714 FileID = ID.getFile(); 2715 LineNum = PLoc.getLine(); 2716 } 2717 2718 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2719 llvm::GlobalVariable *Addr, 2720 bool PerformInit) { 2721 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2722 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2723 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) 2724 return CGM.getLangOpts().OpenMPIsDevice; 2725 VD = VD->getDefinition(CGM.getContext()); 2726 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2727 return CGM.getLangOpts().OpenMPIsDevice; 2728 2729 QualType ASTTy = VD->getType(); 2730 2731 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2732 // Produce the unique prefix to identify the new target regions. We use 2733 // the source location of the variable declaration which we know to not 2734 // conflict with any target region. 2735 unsigned DeviceID; 2736 unsigned FileID; 2737 unsigned Line; 2738 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2739 SmallString<128> Buffer, Out; 2740 { 2741 llvm::raw_svector_ostream OS(Buffer); 2742 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2743 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2744 } 2745 2746 const Expr *Init = VD->getAnyInitializer(); 2747 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2748 llvm::Constant *Ctor; 2749 llvm::Constant *ID; 2750 if (CGM.getLangOpts().OpenMPIsDevice) { 2751 // Generate function that re-emits the declaration's initializer into 2752 // the threadprivate copy of the variable VD 2753 CodeGenFunction CtorCGF(CGM); 2754 2755 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2756 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2757 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2758 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2759 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2760 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2761 FunctionArgList(), Loc, Loc); 2762 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2763 CtorCGF.EmitAnyExprToMem(Init, 2764 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2765 Init->getType().getQualifiers(), 2766 /*IsInitializer=*/true); 2767 CtorCGF.FinishFunction(); 2768 Ctor = Fn; 2769 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2770 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2771 } else { 2772 Ctor = new llvm::GlobalVariable( 2773 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2774 llvm::GlobalValue::PrivateLinkage, 2775 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2776 ID = Ctor; 2777 } 2778 2779 // Register the information for the entry associated with the constructor. 2780 Out.clear(); 2781 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2782 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2783 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2784 } 2785 if (VD->getType().isDestructedType() != QualType::DK_none) { 2786 llvm::Constant *Dtor; 2787 llvm::Constant *ID; 2788 if (CGM.getLangOpts().OpenMPIsDevice) { 2789 // Generate function that emits destructor call for the threadprivate 2790 // copy of the variable VD 2791 CodeGenFunction DtorCGF(CGM); 2792 2793 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2794 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2795 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2796 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2797 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2798 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2799 FunctionArgList(), Loc, Loc); 2800 // Create a scope with an artificial location for the body of this 2801 // function. 2802 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2803 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2804 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2805 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2806 DtorCGF.FinishFunction(); 2807 Dtor = Fn; 2808 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2809 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2810 } else { 2811 Dtor = new llvm::GlobalVariable( 2812 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2813 llvm::GlobalValue::PrivateLinkage, 2814 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2815 ID = Dtor; 2816 } 2817 // Register the information for the entry associated with the destructor. 2818 Out.clear(); 2819 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2820 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2821 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2822 } 2823 return CGM.getLangOpts().OpenMPIsDevice; 2824 } 2825 2826 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2827 QualType VarType, 2828 StringRef Name) { 2829 std::string Suffix = getName({"artificial", ""}); 2830 std::string CacheSuffix = getName({"cache", ""}); 2831 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2832 llvm::Value *GAddr = 2833 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2834 llvm::Value *Args[] = { 2835 emitUpdateLocation(CGF, SourceLocation()), 2836 getThreadID(CGF, SourceLocation()), 2837 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2838 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2839 /*IsSigned=*/false), 2840 getOrCreateInternalVariable( 2841 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2842 return Address( 2843 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2844 CGF.EmitRuntimeCall( 2845 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2846 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2847 CGM.getPointerAlign()); 2848 } 2849 2850 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2851 const RegionCodeGenTy &ThenGen, 2852 const RegionCodeGenTy &ElseGen) { 2853 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2854 2855 // If the condition constant folds and can be elided, try to avoid emitting 2856 // the condition and the dead arm of the if/else. 2857 bool CondConstant; 2858 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2859 if (CondConstant) 2860 ThenGen(CGF); 2861 else 2862 ElseGen(CGF); 2863 return; 2864 } 2865 2866 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2867 // emit the conditional branch. 2868 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2869 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2870 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2871 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2872 2873 // Emit the 'then' code. 2874 CGF.EmitBlock(ThenBlock); 2875 ThenGen(CGF); 2876 CGF.EmitBranch(ContBlock); 2877 // Emit the 'else' code if present. 2878 // There is no need to emit line number for unconditional branch. 2879 (void)ApplyDebugLocation::CreateEmpty(CGF); 2880 CGF.EmitBlock(ElseBlock); 2881 ElseGen(CGF); 2882 // There is no need to emit line number for unconditional branch. 2883 (void)ApplyDebugLocation::CreateEmpty(CGF); 2884 CGF.EmitBranch(ContBlock); 2885 // Emit the continuation block for code after the if. 2886 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2887 } 2888 2889 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2890 llvm::Function *OutlinedFn, 2891 ArrayRef<llvm::Value *> CapturedVars, 2892 const Expr *IfCond) { 2893 if (!CGF.HaveInsertPoint()) 2894 return; 2895 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2896 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2897 PrePostActionTy &) { 2898 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2899 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2900 llvm::Value *Args[] = { 2901 RTLoc, 2902 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2903 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2904 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2905 RealArgs.append(std::begin(Args), std::end(Args)); 2906 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2907 2908 llvm::FunctionCallee RTLFn = 2909 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2910 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2911 }; 2912 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2913 PrePostActionTy &) { 2914 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2915 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2916 // Build calls: 2917 // __kmpc_serialized_parallel(&Loc, GTid); 2918 llvm::Value *Args[] = {RTLoc, ThreadID}; 2919 CGF.EmitRuntimeCall( 2920 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2921 2922 // OutlinedFn(>id, &zero, CapturedStruct); 2923 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2924 /*Name*/ ".zero.addr"); 2925 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2926 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2927 // ThreadId for serialized parallels is 0. 2928 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2929 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2930 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2931 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2932 2933 // __kmpc_end_serialized_parallel(&Loc, GTid); 2934 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2935 CGF.EmitRuntimeCall( 2936 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2937 EndArgs); 2938 }; 2939 if (IfCond) { 2940 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2941 } else { 2942 RegionCodeGenTy ThenRCG(ThenGen); 2943 ThenRCG(CGF); 2944 } 2945 } 2946 2947 // If we're inside an (outlined) parallel region, use the region info's 2948 // thread-ID variable (it is passed in a first argument of the outlined function 2949 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2950 // regular serial code region, get thread ID by calling kmp_int32 2951 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2952 // return the address of that temp. 2953 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2954 SourceLocation Loc) { 2955 if (auto *OMPRegionInfo = 2956 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2957 if (OMPRegionInfo->getThreadIDVariable()) 2958 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2959 2960 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2961 QualType Int32Ty = 2962 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2963 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2964 CGF.EmitStoreOfScalar(ThreadID, 2965 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2966 2967 return ThreadIDTemp; 2968 } 2969 2970 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2971 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2972 SmallString<256> Buffer; 2973 llvm::raw_svector_ostream Out(Buffer); 2974 Out << Name; 2975 StringRef RuntimeName = Out.str(); 2976 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2977 if (Elem.second) { 2978 assert(Elem.second->getType()->getPointerElementType() == Ty && 2979 "OMP internal variable has different type than requested"); 2980 return &*Elem.second; 2981 } 2982 2983 return Elem.second = new llvm::GlobalVariable( 2984 CGM.getModule(), Ty, /*IsConstant*/ false, 2985 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2986 Elem.first(), /*InsertBefore=*/nullptr, 2987 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2988 } 2989 2990 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2991 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2992 std::string Name = getName({Prefix, "var"}); 2993 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2994 } 2995 2996 namespace { 2997 /// Common pre(post)-action for different OpenMP constructs. 2998 class CommonActionTy final : public PrePostActionTy { 2999 llvm::FunctionCallee EnterCallee; 3000 ArrayRef<llvm::Value *> EnterArgs; 3001 llvm::FunctionCallee ExitCallee; 3002 ArrayRef<llvm::Value *> ExitArgs; 3003 bool Conditional; 3004 llvm::BasicBlock *ContBlock = nullptr; 3005 3006 public: 3007 CommonActionTy(llvm::FunctionCallee EnterCallee, 3008 ArrayRef<llvm::Value *> EnterArgs, 3009 llvm::FunctionCallee ExitCallee, 3010 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3011 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3012 ExitArgs(ExitArgs), Conditional(Conditional) {} 3013 void Enter(CodeGenFunction &CGF) override { 3014 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3015 if (Conditional) { 3016 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3017 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3018 ContBlock = CGF.createBasicBlock("omp_if.end"); 3019 // Generate the branch (If-stmt) 3020 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3021 CGF.EmitBlock(ThenBlock); 3022 } 3023 } 3024 void Done(CodeGenFunction &CGF) { 3025 // Emit the rest of blocks/branches 3026 CGF.EmitBranch(ContBlock); 3027 CGF.EmitBlock(ContBlock, true); 3028 } 3029 void Exit(CodeGenFunction &CGF) override { 3030 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3031 } 3032 }; 3033 } // anonymous namespace 3034 3035 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3036 StringRef CriticalName, 3037 const RegionCodeGenTy &CriticalOpGen, 3038 SourceLocation Loc, const Expr *Hint) { 3039 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3040 // CriticalOpGen(); 3041 // __kmpc_end_critical(ident_t *, gtid, Lock); 3042 // Prepare arguments and build a call to __kmpc_critical 3043 if (!CGF.HaveInsertPoint()) 3044 return; 3045 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3046 getCriticalRegionLock(CriticalName)}; 3047 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3048 std::end(Args)); 3049 if (Hint) { 3050 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3051 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3052 } 3053 CommonActionTy Action( 3054 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3055 : OMPRTL__kmpc_critical), 3056 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3057 CriticalOpGen.setAction(Action); 3058 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3059 } 3060 3061 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3062 const RegionCodeGenTy &MasterOpGen, 3063 SourceLocation Loc) { 3064 if (!CGF.HaveInsertPoint()) 3065 return; 3066 // if(__kmpc_master(ident_t *, gtid)) { 3067 // MasterOpGen(); 3068 // __kmpc_end_master(ident_t *, gtid); 3069 // } 3070 // Prepare arguments and build a call to __kmpc_master 3071 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3072 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3073 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3074 /*Conditional=*/true); 3075 MasterOpGen.setAction(Action); 3076 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3077 Action.Done(CGF); 3078 } 3079 3080 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3081 SourceLocation Loc) { 3082 if (!CGF.HaveInsertPoint()) 3083 return; 3084 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3085 llvm::Value *Args[] = { 3086 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3087 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3088 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3089 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3090 Region->emitUntiedSwitch(CGF); 3091 } 3092 3093 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3094 const RegionCodeGenTy &TaskgroupOpGen, 3095 SourceLocation Loc) { 3096 if (!CGF.HaveInsertPoint()) 3097 return; 3098 // __kmpc_taskgroup(ident_t *, gtid); 3099 // TaskgroupOpGen(); 3100 // __kmpc_end_taskgroup(ident_t *, gtid); 3101 // Prepare arguments and build a call to __kmpc_taskgroup 3102 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3103 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3104 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3105 Args); 3106 TaskgroupOpGen.setAction(Action); 3107 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3108 } 3109 3110 /// Given an array of pointers to variables, project the address of a 3111 /// given variable. 3112 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3113 unsigned Index, const VarDecl *Var) { 3114 // Pull out the pointer to the variable. 3115 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3116 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3117 3118 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3119 Addr = CGF.Builder.CreateElementBitCast( 3120 Addr, CGF.ConvertTypeForMem(Var->getType())); 3121 return Addr; 3122 } 3123 3124 static llvm::Value *emitCopyprivateCopyFunction( 3125 CodeGenModule &CGM, llvm::Type *ArgsType, 3126 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3127 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3128 SourceLocation Loc) { 3129 ASTContext &C = CGM.getContext(); 3130 // void copy_func(void *LHSArg, void *RHSArg); 3131 FunctionArgList Args; 3132 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3133 ImplicitParamDecl::Other); 3134 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3135 ImplicitParamDecl::Other); 3136 Args.push_back(&LHSArg); 3137 Args.push_back(&RHSArg); 3138 const auto &CGFI = 3139 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3140 std::string Name = 3141 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3142 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3143 llvm::GlobalValue::InternalLinkage, Name, 3144 &CGM.getModule()); 3145 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3146 Fn->setDoesNotRecurse(); 3147 CodeGenFunction CGF(CGM); 3148 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3149 // Dest = (void*[n])(LHSArg); 3150 // Src = (void*[n])(RHSArg); 3151 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3152 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3153 ArgsType), CGF.getPointerAlign()); 3154 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3155 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3156 ArgsType), CGF.getPointerAlign()); 3157 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3158 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3159 // ... 3160 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3161 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3162 const auto *DestVar = 3163 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3164 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3165 3166 const auto *SrcVar = 3167 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3168 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3169 3170 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3171 QualType Type = VD->getType(); 3172 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3173 } 3174 CGF.FinishFunction(); 3175 return Fn; 3176 } 3177 3178 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3179 const RegionCodeGenTy &SingleOpGen, 3180 SourceLocation Loc, 3181 ArrayRef<const Expr *> CopyprivateVars, 3182 ArrayRef<const Expr *> SrcExprs, 3183 ArrayRef<const Expr *> DstExprs, 3184 ArrayRef<const Expr *> AssignmentOps) { 3185 if (!CGF.HaveInsertPoint()) 3186 return; 3187 assert(CopyprivateVars.size() == SrcExprs.size() && 3188 CopyprivateVars.size() == DstExprs.size() && 3189 CopyprivateVars.size() == AssignmentOps.size()); 3190 ASTContext &C = CGM.getContext(); 3191 // int32 did_it = 0; 3192 // if(__kmpc_single(ident_t *, gtid)) { 3193 // SingleOpGen(); 3194 // __kmpc_end_single(ident_t *, gtid); 3195 // did_it = 1; 3196 // } 3197 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3198 // <copy_func>, did_it); 3199 3200 Address DidIt = Address::invalid(); 3201 if (!CopyprivateVars.empty()) { 3202 // int32 did_it = 0; 3203 QualType KmpInt32Ty = 3204 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3205 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3206 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3207 } 3208 // Prepare arguments and build a call to __kmpc_single 3209 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3210 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3211 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3212 /*Conditional=*/true); 3213 SingleOpGen.setAction(Action); 3214 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3215 if (DidIt.isValid()) { 3216 // did_it = 1; 3217 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3218 } 3219 Action.Done(CGF); 3220 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3221 // <copy_func>, did_it); 3222 if (DidIt.isValid()) { 3223 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3224 QualType CopyprivateArrayTy = 3225 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3226 /*IndexTypeQuals=*/0); 3227 // Create a list of all private variables for copyprivate. 3228 Address CopyprivateList = 3229 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3230 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3231 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3232 CGF.Builder.CreateStore( 3233 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3234 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3235 Elem); 3236 } 3237 // Build function that copies private values from single region to all other 3238 // threads in the corresponding parallel region. 3239 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3240 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3241 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3242 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3243 Address CL = 3244 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3245 CGF.VoidPtrTy); 3246 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3247 llvm::Value *Args[] = { 3248 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3249 getThreadID(CGF, Loc), // i32 <gtid> 3250 BufSize, // size_t <buf_size> 3251 CL.getPointer(), // void *<copyprivate list> 3252 CpyFn, // void (*) (void *, void *) <copy_func> 3253 DidItVal // i32 did_it 3254 }; 3255 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3256 } 3257 } 3258 3259 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3260 const RegionCodeGenTy &OrderedOpGen, 3261 SourceLocation Loc, bool IsThreads) { 3262 if (!CGF.HaveInsertPoint()) 3263 return; 3264 // __kmpc_ordered(ident_t *, gtid); 3265 // OrderedOpGen(); 3266 // __kmpc_end_ordered(ident_t *, gtid); 3267 // Prepare arguments and build a call to __kmpc_ordered 3268 if (IsThreads) { 3269 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3270 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3271 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3272 Args); 3273 OrderedOpGen.setAction(Action); 3274 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3275 return; 3276 } 3277 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3278 } 3279 3280 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3281 unsigned Flags; 3282 if (Kind == OMPD_for) 3283 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3284 else if (Kind == OMPD_sections) 3285 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3286 else if (Kind == OMPD_single) 3287 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3288 else if (Kind == OMPD_barrier) 3289 Flags = OMP_IDENT_BARRIER_EXPL; 3290 else 3291 Flags = OMP_IDENT_BARRIER_IMPL; 3292 return Flags; 3293 } 3294 3295 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3296 CodeGenFunction &CGF, const OMPLoopDirective &S, 3297 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3298 // Check if the loop directive is actually a doacross loop directive. In this 3299 // case choose static, 1 schedule. 3300 if (llvm::any_of( 3301 S.getClausesOfKind<OMPOrderedClause>(), 3302 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3303 ScheduleKind = OMPC_SCHEDULE_static; 3304 // Chunk size is 1 in this case. 3305 llvm::APInt ChunkSize(32, 1); 3306 ChunkExpr = IntegerLiteral::Create( 3307 CGF.getContext(), ChunkSize, 3308 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3309 SourceLocation()); 3310 } 3311 } 3312 3313 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3314 OpenMPDirectiveKind Kind, bool EmitChecks, 3315 bool ForceSimpleCall) { 3316 if (!CGF.HaveInsertPoint()) 3317 return; 3318 // Build call __kmpc_cancel_barrier(loc, thread_id); 3319 // Build call __kmpc_barrier(loc, thread_id); 3320 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3321 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3322 // thread_id); 3323 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3324 getThreadID(CGF, Loc)}; 3325 if (auto *OMPRegionInfo = 3326 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3327 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3328 llvm::Value *Result = CGF.EmitRuntimeCall( 3329 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3330 if (EmitChecks) { 3331 // if (__kmpc_cancel_barrier()) { 3332 // exit from construct; 3333 // } 3334 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3335 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3336 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3337 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3338 CGF.EmitBlock(ExitBB); 3339 // exit from construct; 3340 CodeGenFunction::JumpDest CancelDestination = 3341 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3342 CGF.EmitBranchThroughCleanup(CancelDestination); 3343 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3344 } 3345 return; 3346 } 3347 } 3348 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3349 } 3350 3351 /// Map the OpenMP loop schedule to the runtime enumeration. 3352 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3353 bool Chunked, bool Ordered) { 3354 switch (ScheduleKind) { 3355 case OMPC_SCHEDULE_static: 3356 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3357 : (Ordered ? OMP_ord_static : OMP_sch_static); 3358 case OMPC_SCHEDULE_dynamic: 3359 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3360 case OMPC_SCHEDULE_guided: 3361 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3362 case OMPC_SCHEDULE_runtime: 3363 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3364 case OMPC_SCHEDULE_auto: 3365 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3366 case OMPC_SCHEDULE_unknown: 3367 assert(!Chunked && "chunk was specified but schedule kind not known"); 3368 return Ordered ? OMP_ord_static : OMP_sch_static; 3369 } 3370 llvm_unreachable("Unexpected runtime schedule"); 3371 } 3372 3373 /// Map the OpenMP distribute schedule to the runtime enumeration. 3374 static OpenMPSchedType 3375 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3376 // only static is allowed for dist_schedule 3377 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3378 } 3379 3380 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3381 bool Chunked) const { 3382 OpenMPSchedType Schedule = 3383 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3384 return Schedule == OMP_sch_static; 3385 } 3386 3387 bool CGOpenMPRuntime::isStaticNonchunked( 3388 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3389 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3390 return Schedule == OMP_dist_sch_static; 3391 } 3392 3393 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3394 bool Chunked) const { 3395 OpenMPSchedType Schedule = 3396 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3397 return Schedule == OMP_sch_static_chunked; 3398 } 3399 3400 bool CGOpenMPRuntime::isStaticChunked( 3401 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3402 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3403 return Schedule == OMP_dist_sch_static_chunked; 3404 } 3405 3406 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3407 OpenMPSchedType Schedule = 3408 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3409 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3410 return Schedule != OMP_sch_static; 3411 } 3412 3413 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3414 OpenMPScheduleClauseModifier M1, 3415 OpenMPScheduleClauseModifier M2) { 3416 int Modifier = 0; 3417 switch (M1) { 3418 case OMPC_SCHEDULE_MODIFIER_monotonic: 3419 Modifier = OMP_sch_modifier_monotonic; 3420 break; 3421 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3422 Modifier = OMP_sch_modifier_nonmonotonic; 3423 break; 3424 case OMPC_SCHEDULE_MODIFIER_simd: 3425 if (Schedule == OMP_sch_static_chunked) 3426 Schedule = OMP_sch_static_balanced_chunked; 3427 break; 3428 case OMPC_SCHEDULE_MODIFIER_last: 3429 case OMPC_SCHEDULE_MODIFIER_unknown: 3430 break; 3431 } 3432 switch (M2) { 3433 case OMPC_SCHEDULE_MODIFIER_monotonic: 3434 Modifier = OMP_sch_modifier_monotonic; 3435 break; 3436 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3437 Modifier = OMP_sch_modifier_nonmonotonic; 3438 break; 3439 case OMPC_SCHEDULE_MODIFIER_simd: 3440 if (Schedule == OMP_sch_static_chunked) 3441 Schedule = OMP_sch_static_balanced_chunked; 3442 break; 3443 case OMPC_SCHEDULE_MODIFIER_last: 3444 case OMPC_SCHEDULE_MODIFIER_unknown: 3445 break; 3446 } 3447 return Schedule | Modifier; 3448 } 3449 3450 void CGOpenMPRuntime::emitForDispatchInit( 3451 CodeGenFunction &CGF, SourceLocation Loc, 3452 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3453 bool Ordered, const DispatchRTInput &DispatchValues) { 3454 if (!CGF.HaveInsertPoint()) 3455 return; 3456 OpenMPSchedType Schedule = getRuntimeSchedule( 3457 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3458 assert(Ordered || 3459 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3460 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3461 Schedule != OMP_sch_static_balanced_chunked)); 3462 // Call __kmpc_dispatch_init( 3463 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3464 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3465 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3466 3467 // If the Chunk was not specified in the clause - use default value 1. 3468 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3469 : CGF.Builder.getIntN(IVSize, 1); 3470 llvm::Value *Args[] = { 3471 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3472 CGF.Builder.getInt32(addMonoNonMonoModifier( 3473 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3474 DispatchValues.LB, // Lower 3475 DispatchValues.UB, // Upper 3476 CGF.Builder.getIntN(IVSize, 1), // Stride 3477 Chunk // Chunk 3478 }; 3479 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3480 } 3481 3482 static void emitForStaticInitCall( 3483 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3484 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3485 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3486 const CGOpenMPRuntime::StaticRTInput &Values) { 3487 if (!CGF.HaveInsertPoint()) 3488 return; 3489 3490 assert(!Values.Ordered); 3491 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3492 Schedule == OMP_sch_static_balanced_chunked || 3493 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3494 Schedule == OMP_dist_sch_static || 3495 Schedule == OMP_dist_sch_static_chunked); 3496 3497 // Call __kmpc_for_static_init( 3498 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3499 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3500 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3501 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3502 llvm::Value *Chunk = Values.Chunk; 3503 if (Chunk == nullptr) { 3504 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3505 Schedule == OMP_dist_sch_static) && 3506 "expected static non-chunked schedule"); 3507 // If the Chunk was not specified in the clause - use default value 1. 3508 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3509 } else { 3510 assert((Schedule == OMP_sch_static_chunked || 3511 Schedule == OMP_sch_static_balanced_chunked || 3512 Schedule == OMP_ord_static_chunked || 3513 Schedule == OMP_dist_sch_static_chunked) && 3514 "expected static chunked schedule"); 3515 } 3516 llvm::Value *Args[] = { 3517 UpdateLocation, 3518 ThreadId, 3519 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3520 M2)), // Schedule type 3521 Values.IL.getPointer(), // &isLastIter 3522 Values.LB.getPointer(), // &LB 3523 Values.UB.getPointer(), // &UB 3524 Values.ST.getPointer(), // &Stride 3525 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3526 Chunk // Chunk 3527 }; 3528 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3529 } 3530 3531 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3532 SourceLocation Loc, 3533 OpenMPDirectiveKind DKind, 3534 const OpenMPScheduleTy &ScheduleKind, 3535 const StaticRTInput &Values) { 3536 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3537 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3538 assert(isOpenMPWorksharingDirective(DKind) && 3539 "Expected loop-based or sections-based directive."); 3540 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3541 isOpenMPLoopDirective(DKind) 3542 ? OMP_IDENT_WORK_LOOP 3543 : OMP_IDENT_WORK_SECTIONS); 3544 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3545 llvm::FunctionCallee StaticInitFunction = 3546 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3547 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3548 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3549 } 3550 3551 void CGOpenMPRuntime::emitDistributeStaticInit( 3552 CodeGenFunction &CGF, SourceLocation Loc, 3553 OpenMPDistScheduleClauseKind SchedKind, 3554 const CGOpenMPRuntime::StaticRTInput &Values) { 3555 OpenMPSchedType ScheduleNum = 3556 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3557 llvm::Value *UpdatedLocation = 3558 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3559 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3560 llvm::FunctionCallee StaticInitFunction = 3561 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3562 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3563 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3564 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3565 } 3566 3567 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3568 SourceLocation Loc, 3569 OpenMPDirectiveKind DKind) { 3570 if (!CGF.HaveInsertPoint()) 3571 return; 3572 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3573 llvm::Value *Args[] = { 3574 emitUpdateLocation(CGF, Loc, 3575 isOpenMPDistributeDirective(DKind) 3576 ? OMP_IDENT_WORK_DISTRIBUTE 3577 : isOpenMPLoopDirective(DKind) 3578 ? OMP_IDENT_WORK_LOOP 3579 : OMP_IDENT_WORK_SECTIONS), 3580 getThreadID(CGF, Loc)}; 3581 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3582 Args); 3583 } 3584 3585 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3586 SourceLocation Loc, 3587 unsigned IVSize, 3588 bool IVSigned) { 3589 if (!CGF.HaveInsertPoint()) 3590 return; 3591 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3592 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3593 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3594 } 3595 3596 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3597 SourceLocation Loc, unsigned IVSize, 3598 bool IVSigned, Address IL, 3599 Address LB, Address UB, 3600 Address ST) { 3601 // Call __kmpc_dispatch_next( 3602 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3603 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3604 // kmp_int[32|64] *p_stride); 3605 llvm::Value *Args[] = { 3606 emitUpdateLocation(CGF, Loc), 3607 getThreadID(CGF, Loc), 3608 IL.getPointer(), // &isLastIter 3609 LB.getPointer(), // &Lower 3610 UB.getPointer(), // &Upper 3611 ST.getPointer() // &Stride 3612 }; 3613 llvm::Value *Call = 3614 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3615 return CGF.EmitScalarConversion( 3616 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3617 CGF.getContext().BoolTy, Loc); 3618 } 3619 3620 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3621 llvm::Value *NumThreads, 3622 SourceLocation Loc) { 3623 if (!CGF.HaveInsertPoint()) 3624 return; 3625 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3626 llvm::Value *Args[] = { 3627 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3628 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3629 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3630 Args); 3631 } 3632 3633 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3634 OpenMPProcBindClauseKind ProcBind, 3635 SourceLocation Loc) { 3636 if (!CGF.HaveInsertPoint()) 3637 return; 3638 // Constants for proc bind value accepted by the runtime. 3639 enum ProcBindTy { 3640 ProcBindFalse = 0, 3641 ProcBindTrue, 3642 ProcBindMaster, 3643 ProcBindClose, 3644 ProcBindSpread, 3645 ProcBindIntel, 3646 ProcBindDefault 3647 } RuntimeProcBind; 3648 switch (ProcBind) { 3649 case OMPC_PROC_BIND_master: 3650 RuntimeProcBind = ProcBindMaster; 3651 break; 3652 case OMPC_PROC_BIND_close: 3653 RuntimeProcBind = ProcBindClose; 3654 break; 3655 case OMPC_PROC_BIND_spread: 3656 RuntimeProcBind = ProcBindSpread; 3657 break; 3658 case OMPC_PROC_BIND_unknown: 3659 llvm_unreachable("Unsupported proc_bind value."); 3660 } 3661 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3662 llvm::Value *Args[] = { 3663 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3664 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3665 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3666 } 3667 3668 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3669 SourceLocation Loc) { 3670 if (!CGF.HaveInsertPoint()) 3671 return; 3672 // Build call void __kmpc_flush(ident_t *loc) 3673 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3674 emitUpdateLocation(CGF, Loc)); 3675 } 3676 3677 namespace { 3678 /// Indexes of fields for type kmp_task_t. 3679 enum KmpTaskTFields { 3680 /// List of shared variables. 3681 KmpTaskTShareds, 3682 /// Task routine. 3683 KmpTaskTRoutine, 3684 /// Partition id for the untied tasks. 3685 KmpTaskTPartId, 3686 /// Function with call of destructors for private variables. 3687 Data1, 3688 /// Task priority. 3689 Data2, 3690 /// (Taskloops only) Lower bound. 3691 KmpTaskTLowerBound, 3692 /// (Taskloops only) Upper bound. 3693 KmpTaskTUpperBound, 3694 /// (Taskloops only) Stride. 3695 KmpTaskTStride, 3696 /// (Taskloops only) Is last iteration flag. 3697 KmpTaskTLastIter, 3698 /// (Taskloops only) Reduction data. 3699 KmpTaskTReductions, 3700 }; 3701 } // anonymous namespace 3702 3703 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3704 return OffloadEntriesTargetRegion.empty() && 3705 OffloadEntriesDeviceGlobalVar.empty(); 3706 } 3707 3708 /// Initialize target region entry. 3709 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3710 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3711 StringRef ParentName, unsigned LineNum, 3712 unsigned Order) { 3713 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3714 "only required for the device " 3715 "code generation."); 3716 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3717 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3718 OMPTargetRegionEntryTargetRegion); 3719 ++OffloadingEntriesNum; 3720 } 3721 3722 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3723 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3724 StringRef ParentName, unsigned LineNum, 3725 llvm::Constant *Addr, llvm::Constant *ID, 3726 OMPTargetRegionEntryKind Flags) { 3727 // If we are emitting code for a target, the entry is already initialized, 3728 // only has to be registered. 3729 if (CGM.getLangOpts().OpenMPIsDevice) { 3730 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3731 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3732 DiagnosticsEngine::Error, 3733 "Unable to find target region on line '%0' in the device code."); 3734 CGM.getDiags().Report(DiagID) << LineNum; 3735 return; 3736 } 3737 auto &Entry = 3738 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3739 assert(Entry.isValid() && "Entry not initialized!"); 3740 Entry.setAddress(Addr); 3741 Entry.setID(ID); 3742 Entry.setFlags(Flags); 3743 } else { 3744 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3745 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3746 ++OffloadingEntriesNum; 3747 } 3748 } 3749 3750 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3751 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3752 unsigned LineNum) const { 3753 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3754 if (PerDevice == OffloadEntriesTargetRegion.end()) 3755 return false; 3756 auto PerFile = PerDevice->second.find(FileID); 3757 if (PerFile == PerDevice->second.end()) 3758 return false; 3759 auto PerParentName = PerFile->second.find(ParentName); 3760 if (PerParentName == PerFile->second.end()) 3761 return false; 3762 auto PerLine = PerParentName->second.find(LineNum); 3763 if (PerLine == PerParentName->second.end()) 3764 return false; 3765 // Fail if this entry is already registered. 3766 if (PerLine->second.getAddress() || PerLine->second.getID()) 3767 return false; 3768 return true; 3769 } 3770 3771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3772 const OffloadTargetRegionEntryInfoActTy &Action) { 3773 // Scan all target region entries and perform the provided action. 3774 for (const auto &D : OffloadEntriesTargetRegion) 3775 for (const auto &F : D.second) 3776 for (const auto &P : F.second) 3777 for (const auto &L : P.second) 3778 Action(D.first, F.first, P.first(), L.first, L.second); 3779 } 3780 3781 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3782 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3783 OMPTargetGlobalVarEntryKind Flags, 3784 unsigned Order) { 3785 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3786 "only required for the device " 3787 "code generation."); 3788 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3789 ++OffloadingEntriesNum; 3790 } 3791 3792 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3793 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3794 CharUnits VarSize, 3795 OMPTargetGlobalVarEntryKind Flags, 3796 llvm::GlobalValue::LinkageTypes Linkage) { 3797 if (CGM.getLangOpts().OpenMPIsDevice) { 3798 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3799 assert(Entry.isValid() && Entry.getFlags() == Flags && 3800 "Entry not initialized!"); 3801 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3802 "Resetting with the new address."); 3803 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3804 if (Entry.getVarSize().isZero()) { 3805 Entry.setVarSize(VarSize); 3806 Entry.setLinkage(Linkage); 3807 } 3808 return; 3809 } 3810 Entry.setVarSize(VarSize); 3811 Entry.setLinkage(Linkage); 3812 Entry.setAddress(Addr); 3813 } else { 3814 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3815 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3816 assert(Entry.isValid() && Entry.getFlags() == Flags && 3817 "Entry not initialized!"); 3818 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3819 "Resetting with the new address."); 3820 if (Entry.getVarSize().isZero()) { 3821 Entry.setVarSize(VarSize); 3822 Entry.setLinkage(Linkage); 3823 } 3824 return; 3825 } 3826 OffloadEntriesDeviceGlobalVar.try_emplace( 3827 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3828 ++OffloadingEntriesNum; 3829 } 3830 } 3831 3832 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3833 actOnDeviceGlobalVarEntriesInfo( 3834 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3835 // Scan all target region entries and perform the provided action. 3836 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3837 Action(E.getKey(), E.getValue()); 3838 } 3839 3840 llvm::Function * 3841 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3842 // If we don't have entries or if we are emitting code for the device, we 3843 // don't need to do anything. 3844 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3845 return nullptr; 3846 3847 llvm::Module &M = CGM.getModule(); 3848 ASTContext &C = CGM.getContext(); 3849 3850 // Get list of devices we care about 3851 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3852 3853 // We should be creating an offloading descriptor only if there are devices 3854 // specified. 3855 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3856 3857 // Create the external variables that will point to the begin and end of the 3858 // host entries section. These will be defined by the linker. 3859 llvm::Type *OffloadEntryTy = 3860 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3861 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3862 auto *HostEntriesBegin = new llvm::GlobalVariable( 3863 M, OffloadEntryTy, /*isConstant=*/true, 3864 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3865 EntriesBeginName); 3866 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3867 auto *HostEntriesEnd = 3868 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3869 llvm::GlobalValue::ExternalLinkage, 3870 /*Initializer=*/nullptr, EntriesEndName); 3871 3872 // Create all device images 3873 auto *DeviceImageTy = cast<llvm::StructType>( 3874 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3875 ConstantInitBuilder DeviceImagesBuilder(CGM); 3876 ConstantArrayBuilder DeviceImagesEntries = 3877 DeviceImagesBuilder.beginArray(DeviceImageTy); 3878 3879 for (const llvm::Triple &Device : Devices) { 3880 StringRef T = Device.getTriple(); 3881 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3882 auto *ImgBegin = new llvm::GlobalVariable( 3883 M, CGM.Int8Ty, /*isConstant=*/true, 3884 llvm::GlobalValue::ExternalWeakLinkage, 3885 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3886 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3887 auto *ImgEnd = new llvm::GlobalVariable( 3888 M, CGM.Int8Ty, /*isConstant=*/true, 3889 llvm::GlobalValue::ExternalWeakLinkage, 3890 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3891 3892 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3893 HostEntriesEnd}; 3894 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3895 DeviceImagesEntries); 3896 } 3897 3898 // Create device images global array. 3899 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3900 llvm::GlobalVariable *DeviceImages = 3901 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 3902 CGM.getPointerAlign(), 3903 /*isConstant=*/true); 3904 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3905 3906 // This is a Zero array to be used in the creation of the constant expressions 3907 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3908 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3909 3910 // Create the target region descriptor. 3911 llvm::Constant *Data[] = { 3912 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 3913 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3914 DeviceImages, Index), 3915 HostEntriesBegin, HostEntriesEnd}; 3916 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 3917 llvm::GlobalVariable *Desc = createGlobalStruct( 3918 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 3919 3920 // Emit code to register or unregister the descriptor at execution 3921 // startup or closing, respectively. 3922 3923 llvm::Function *UnRegFn; 3924 { 3925 FunctionArgList Args; 3926 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3927 Args.push_back(&DummyPtr); 3928 3929 CodeGenFunction CGF(CGM); 3930 // Disable debug info for global (de-)initializer because they are not part 3931 // of some particular construct. 3932 CGF.disableDebugInfo(); 3933 const auto &FI = 3934 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3935 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3936 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 3937 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 3938 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 3939 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3940 Desc); 3941 CGF.FinishFunction(); 3942 } 3943 llvm::Function *RegFn; 3944 { 3945 CodeGenFunction CGF(CGM); 3946 // Disable debug info for global (de-)initializer because they are not part 3947 // of some particular construct. 3948 CGF.disableDebugInfo(); 3949 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 3950 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3951 3952 // Encode offload target triples into the registration function name. It 3953 // will serve as a comdat key for the registration/unregistration code for 3954 // this particular combination of offloading targets. 3955 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 3956 RegFnNameParts[0] = "omp_offloading"; 3957 RegFnNameParts[1] = "descriptor_reg"; 3958 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 3959 [](const llvm::Triple &T) -> const std::string& { 3960 return T.getTriple(); 3961 }); 3962 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 3963 std::string Descriptor = getName(RegFnNameParts); 3964 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 3965 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 3966 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 3967 // Create a variable to drive the registration and unregistration of the 3968 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3969 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 3970 SourceLocation(), nullptr, C.CharTy, 3971 ImplicitParamDecl::Other); 3972 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3973 CGF.FinishFunction(); 3974 } 3975 if (CGM.supportsCOMDAT()) { 3976 // It is sufficient to call registration function only once, so create a 3977 // COMDAT group for registration/unregistration functions and associated 3978 // data. That would reduce startup time and code size. Registration 3979 // function serves as a COMDAT group key. 3980 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3981 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3982 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3983 RegFn->setComdat(ComdatKey); 3984 UnRegFn->setComdat(ComdatKey); 3985 DeviceImages->setComdat(ComdatKey); 3986 Desc->setComdat(ComdatKey); 3987 } 3988 return RegFn; 3989 } 3990 3991 void CGOpenMPRuntime::createOffloadEntry( 3992 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3993 llvm::GlobalValue::LinkageTypes Linkage) { 3994 StringRef Name = Addr->getName(); 3995 llvm::Module &M = CGM.getModule(); 3996 llvm::LLVMContext &C = M.getContext(); 3997 3998 // Create constant string with the name. 3999 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4000 4001 std::string StringName = getName({"omp_offloading", "entry_name"}); 4002 auto *Str = new llvm::GlobalVariable( 4003 M, StrPtrInit->getType(), /*isConstant=*/true, 4004 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4005 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4006 4007 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4008 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4009 llvm::ConstantInt::get(CGM.SizeTy, Size), 4010 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4011 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4012 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4013 llvm::GlobalVariable *Entry = createGlobalStruct( 4014 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4015 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4016 4017 // The entry has to be created in the section the linker expects it to be. 4018 std::string Section = getName({"omp_offloading", "entries"}); 4019 Entry->setSection(Section); 4020 } 4021 4022 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4023 // Emit the offloading entries and metadata so that the device codegen side 4024 // can easily figure out what to emit. The produced metadata looks like 4025 // this: 4026 // 4027 // !omp_offload.info = !{!1, ...} 4028 // 4029 // Right now we only generate metadata for function that contain target 4030 // regions. 4031 4032 // If we do not have entries, we don't need to do anything. 4033 if (OffloadEntriesInfoManager.empty()) 4034 return; 4035 4036 llvm::Module &M = CGM.getModule(); 4037 llvm::LLVMContext &C = M.getContext(); 4038 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 4039 OrderedEntries(OffloadEntriesInfoManager.size()); 4040 llvm::SmallVector<StringRef, 16> ParentFunctions( 4041 OffloadEntriesInfoManager.size()); 4042 4043 // Auxiliary methods to create metadata values and strings. 4044 auto &&GetMDInt = [this](unsigned V) { 4045 return llvm::ConstantAsMetadata::get( 4046 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4047 }; 4048 4049 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4050 4051 // Create the offloading info metadata node. 4052 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4053 4054 // Create function that emits metadata for each target region entry; 4055 auto &&TargetRegionMetadataEmitter = 4056 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4057 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4058 unsigned Line, 4059 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4060 // Generate metadata for target regions. Each entry of this metadata 4061 // contains: 4062 // - Entry 0 -> Kind of this type of metadata (0). 4063 // - Entry 1 -> Device ID of the file where the entry was identified. 4064 // - Entry 2 -> File ID of the file where the entry was identified. 4065 // - Entry 3 -> Mangled name of the function where the entry was 4066 // identified. 4067 // - Entry 4 -> Line in the file where the entry was identified. 4068 // - Entry 5 -> Order the entry was created. 4069 // The first element of the metadata node is the kind. 4070 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4071 GetMDInt(FileID), GetMDString(ParentName), 4072 GetMDInt(Line), GetMDInt(E.getOrder())}; 4073 4074 // Save this entry in the right position of the ordered entries array. 4075 OrderedEntries[E.getOrder()] = &E; 4076 ParentFunctions[E.getOrder()] = ParentName; 4077 4078 // Add metadata to the named metadata node. 4079 MD->addOperand(llvm::MDNode::get(C, Ops)); 4080 }; 4081 4082 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4083 TargetRegionMetadataEmitter); 4084 4085 // Create function that emits metadata for each device global variable entry; 4086 auto &&DeviceGlobalVarMetadataEmitter = 4087 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4088 MD](StringRef MangledName, 4089 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4090 &E) { 4091 // Generate metadata for global variables. Each entry of this metadata 4092 // contains: 4093 // - Entry 0 -> Kind of this type of metadata (1). 4094 // - Entry 1 -> Mangled name of the variable. 4095 // - Entry 2 -> Declare target kind. 4096 // - Entry 3 -> Order the entry was created. 4097 // The first element of the metadata node is the kind. 4098 llvm::Metadata *Ops[] = { 4099 GetMDInt(E.getKind()), GetMDString(MangledName), 4100 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4101 4102 // Save this entry in the right position of the ordered entries array. 4103 OrderedEntries[E.getOrder()] = &E; 4104 4105 // Add metadata to the named metadata node. 4106 MD->addOperand(llvm::MDNode::get(C, Ops)); 4107 }; 4108 4109 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4110 DeviceGlobalVarMetadataEmitter); 4111 4112 for (const auto *E : OrderedEntries) { 4113 assert(E && "All ordered entries must exist!"); 4114 if (const auto *CE = 4115 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4116 E)) { 4117 if (!CE->getID() || !CE->getAddress()) { 4118 // Do not blame the entry if the parent funtion is not emitted. 4119 StringRef FnName = ParentFunctions[CE->getOrder()]; 4120 if (!CGM.GetGlobalValue(FnName)) 4121 continue; 4122 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4123 DiagnosticsEngine::Error, 4124 "Offloading entry for target region is incorrect: either the " 4125 "address or the ID is invalid."); 4126 CGM.getDiags().Report(DiagID); 4127 continue; 4128 } 4129 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4130 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4131 } else if (const auto *CE = 4132 dyn_cast<OffloadEntriesInfoManagerTy:: 4133 OffloadEntryInfoDeviceGlobalVar>(E)) { 4134 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4135 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4136 CE->getFlags()); 4137 switch (Flags) { 4138 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4139 if (!CE->getAddress()) { 4140 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4141 DiagnosticsEngine::Error, 4142 "Offloading entry for declare target variable is incorrect: the " 4143 "address is invalid."); 4144 CGM.getDiags().Report(DiagID); 4145 continue; 4146 } 4147 // The vaiable has no definition - no need to add the entry. 4148 if (CE->getVarSize().isZero()) 4149 continue; 4150 break; 4151 } 4152 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4153 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4154 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4155 "Declaret target link address is set."); 4156 if (CGM.getLangOpts().OpenMPIsDevice) 4157 continue; 4158 if (!CE->getAddress()) { 4159 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4160 DiagnosticsEngine::Error, 4161 "Offloading entry for declare target variable is incorrect: the " 4162 "address is invalid."); 4163 CGM.getDiags().Report(DiagID); 4164 continue; 4165 } 4166 break; 4167 } 4168 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4169 CE->getVarSize().getQuantity(), Flags, 4170 CE->getLinkage()); 4171 } else { 4172 llvm_unreachable("Unsupported entry kind."); 4173 } 4174 } 4175 } 4176 4177 /// Loads all the offload entries information from the host IR 4178 /// metadata. 4179 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4180 // If we are in target mode, load the metadata from the host IR. This code has 4181 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4182 4183 if (!CGM.getLangOpts().OpenMPIsDevice) 4184 return; 4185 4186 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4187 return; 4188 4189 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4190 if (auto EC = Buf.getError()) { 4191 CGM.getDiags().Report(diag::err_cannot_open_file) 4192 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4193 return; 4194 } 4195 4196 llvm::LLVMContext C; 4197 auto ME = expectedToErrorOrAndEmitErrors( 4198 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4199 4200 if (auto EC = ME.getError()) { 4201 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4202 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4203 CGM.getDiags().Report(DiagID) 4204 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4205 return; 4206 } 4207 4208 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4209 if (!MD) 4210 return; 4211 4212 for (llvm::MDNode *MN : MD->operands()) { 4213 auto &&GetMDInt = [MN](unsigned Idx) { 4214 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4215 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4216 }; 4217 4218 auto &&GetMDString = [MN](unsigned Idx) { 4219 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4220 return V->getString(); 4221 }; 4222 4223 switch (GetMDInt(0)) { 4224 default: 4225 llvm_unreachable("Unexpected metadata!"); 4226 break; 4227 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4228 OffloadingEntryInfoTargetRegion: 4229 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4230 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4231 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4232 /*Order=*/GetMDInt(5)); 4233 break; 4234 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4235 OffloadingEntryInfoDeviceGlobalVar: 4236 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4237 /*MangledName=*/GetMDString(1), 4238 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4239 /*Flags=*/GetMDInt(2)), 4240 /*Order=*/GetMDInt(3)); 4241 break; 4242 } 4243 } 4244 } 4245 4246 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4247 if (!KmpRoutineEntryPtrTy) { 4248 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4249 ASTContext &C = CGM.getContext(); 4250 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4251 FunctionProtoType::ExtProtoInfo EPI; 4252 KmpRoutineEntryPtrQTy = C.getPointerType( 4253 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4254 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4255 } 4256 } 4257 4258 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4259 // Make sure the type of the entry is already created. This is the type we 4260 // have to create: 4261 // struct __tgt_offload_entry{ 4262 // void *addr; // Pointer to the offload entry info. 4263 // // (function or global) 4264 // char *name; // Name of the function or global. 4265 // size_t size; // Size of the entry info (0 if it a function). 4266 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4267 // int32_t reserved; // Reserved, to use by the runtime library. 4268 // }; 4269 if (TgtOffloadEntryQTy.isNull()) { 4270 ASTContext &C = CGM.getContext(); 4271 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4272 RD->startDefinition(); 4273 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4274 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4275 addFieldToRecordDecl(C, RD, C.getSizeType()); 4276 addFieldToRecordDecl( 4277 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4278 addFieldToRecordDecl( 4279 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4280 RD->completeDefinition(); 4281 RD->addAttr(PackedAttr::CreateImplicit(C)); 4282 TgtOffloadEntryQTy = C.getRecordType(RD); 4283 } 4284 return TgtOffloadEntryQTy; 4285 } 4286 4287 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4288 // These are the types we need to build: 4289 // struct __tgt_device_image{ 4290 // void *ImageStart; // Pointer to the target code start. 4291 // void *ImageEnd; // Pointer to the target code end. 4292 // // We also add the host entries to the device image, as it may be useful 4293 // // for the target runtime to have access to that information. 4294 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4295 // // the entries. 4296 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4297 // // entries (non inclusive). 4298 // }; 4299 if (TgtDeviceImageQTy.isNull()) { 4300 ASTContext &C = CGM.getContext(); 4301 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4302 RD->startDefinition(); 4303 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4304 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4305 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4306 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4307 RD->completeDefinition(); 4308 TgtDeviceImageQTy = C.getRecordType(RD); 4309 } 4310 return TgtDeviceImageQTy; 4311 } 4312 4313 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4314 // struct __tgt_bin_desc{ 4315 // int32_t NumDevices; // Number of devices supported. 4316 // __tgt_device_image *DeviceImages; // Arrays of device images 4317 // // (one per device). 4318 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4319 // // entries. 4320 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4321 // // entries (non inclusive). 4322 // }; 4323 if (TgtBinaryDescriptorQTy.isNull()) { 4324 ASTContext &C = CGM.getContext(); 4325 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4326 RD->startDefinition(); 4327 addFieldToRecordDecl( 4328 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4329 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4330 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4331 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4332 RD->completeDefinition(); 4333 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4334 } 4335 return TgtBinaryDescriptorQTy; 4336 } 4337 4338 namespace { 4339 struct PrivateHelpersTy { 4340 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4341 const VarDecl *PrivateElemInit) 4342 : Original(Original), PrivateCopy(PrivateCopy), 4343 PrivateElemInit(PrivateElemInit) {} 4344 const VarDecl *Original; 4345 const VarDecl *PrivateCopy; 4346 const VarDecl *PrivateElemInit; 4347 }; 4348 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4349 } // anonymous namespace 4350 4351 static RecordDecl * 4352 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4353 if (!Privates.empty()) { 4354 ASTContext &C = CGM.getContext(); 4355 // Build struct .kmp_privates_t. { 4356 // /* private vars */ 4357 // }; 4358 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4359 RD->startDefinition(); 4360 for (const auto &Pair : Privates) { 4361 const VarDecl *VD = Pair.second.Original; 4362 QualType Type = VD->getType().getNonReferenceType(); 4363 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4364 if (VD->hasAttrs()) { 4365 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4366 E(VD->getAttrs().end()); 4367 I != E; ++I) 4368 FD->addAttr(*I); 4369 } 4370 } 4371 RD->completeDefinition(); 4372 return RD; 4373 } 4374 return nullptr; 4375 } 4376 4377 static RecordDecl * 4378 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4379 QualType KmpInt32Ty, 4380 QualType KmpRoutineEntryPointerQTy) { 4381 ASTContext &C = CGM.getContext(); 4382 // Build struct kmp_task_t { 4383 // void * shareds; 4384 // kmp_routine_entry_t routine; 4385 // kmp_int32 part_id; 4386 // kmp_cmplrdata_t data1; 4387 // kmp_cmplrdata_t data2; 4388 // For taskloops additional fields: 4389 // kmp_uint64 lb; 4390 // kmp_uint64 ub; 4391 // kmp_int64 st; 4392 // kmp_int32 liter; 4393 // void * reductions; 4394 // }; 4395 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4396 UD->startDefinition(); 4397 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4398 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4399 UD->completeDefinition(); 4400 QualType KmpCmplrdataTy = C.getRecordType(UD); 4401 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4402 RD->startDefinition(); 4403 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4404 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4405 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4406 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4407 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4408 if (isOpenMPTaskLoopDirective(Kind)) { 4409 QualType KmpUInt64Ty = 4410 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4411 QualType KmpInt64Ty = 4412 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4413 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4414 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4415 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4416 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4417 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4418 } 4419 RD->completeDefinition(); 4420 return RD; 4421 } 4422 4423 static RecordDecl * 4424 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4425 ArrayRef<PrivateDataTy> Privates) { 4426 ASTContext &C = CGM.getContext(); 4427 // Build struct kmp_task_t_with_privates { 4428 // kmp_task_t task_data; 4429 // .kmp_privates_t. privates; 4430 // }; 4431 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4432 RD->startDefinition(); 4433 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4434 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4435 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4436 RD->completeDefinition(); 4437 return RD; 4438 } 4439 4440 /// Emit a proxy function which accepts kmp_task_t as the second 4441 /// argument. 4442 /// \code 4443 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4444 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4445 /// For taskloops: 4446 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4447 /// tt->reductions, tt->shareds); 4448 /// return 0; 4449 /// } 4450 /// \endcode 4451 static llvm::Function * 4452 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4453 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4454 QualType KmpTaskTWithPrivatesPtrQTy, 4455 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4456 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4457 llvm::Value *TaskPrivatesMap) { 4458 ASTContext &C = CGM.getContext(); 4459 FunctionArgList Args; 4460 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4461 ImplicitParamDecl::Other); 4462 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4463 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4464 ImplicitParamDecl::Other); 4465 Args.push_back(&GtidArg); 4466 Args.push_back(&TaskTypeArg); 4467 const auto &TaskEntryFnInfo = 4468 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4469 llvm::FunctionType *TaskEntryTy = 4470 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4471 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4472 auto *TaskEntry = llvm::Function::Create( 4473 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4474 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4475 TaskEntry->setDoesNotRecurse(); 4476 CodeGenFunction CGF(CGM); 4477 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4478 Loc, Loc); 4479 4480 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4481 // tt, 4482 // For taskloops: 4483 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4484 // tt->task_data.shareds); 4485 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4486 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4487 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4488 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4489 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4490 const auto *KmpTaskTWithPrivatesQTyRD = 4491 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4492 LValue Base = 4493 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4494 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4495 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4496 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4497 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4498 4499 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4500 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4501 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4502 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4503 CGF.ConvertTypeForMem(SharedsPtrTy)); 4504 4505 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4506 llvm::Value *PrivatesParam; 4507 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4508 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4509 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4510 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4511 } else { 4512 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4513 } 4514 4515 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4516 TaskPrivatesMap, 4517 CGF.Builder 4518 .CreatePointerBitCastOrAddrSpaceCast( 4519 TDBase.getAddress(), CGF.VoidPtrTy) 4520 .getPointer()}; 4521 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4522 std::end(CommonArgs)); 4523 if (isOpenMPTaskLoopDirective(Kind)) { 4524 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4525 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4526 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4527 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4528 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4529 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4530 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4531 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4532 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4533 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4534 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4535 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4536 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4537 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4538 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4539 CallArgs.push_back(LBParam); 4540 CallArgs.push_back(UBParam); 4541 CallArgs.push_back(StParam); 4542 CallArgs.push_back(LIParam); 4543 CallArgs.push_back(RParam); 4544 } 4545 CallArgs.push_back(SharedsParam); 4546 4547 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4548 CallArgs); 4549 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4550 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4551 CGF.FinishFunction(); 4552 return TaskEntry; 4553 } 4554 4555 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4556 SourceLocation Loc, 4557 QualType KmpInt32Ty, 4558 QualType KmpTaskTWithPrivatesPtrQTy, 4559 QualType KmpTaskTWithPrivatesQTy) { 4560 ASTContext &C = CGM.getContext(); 4561 FunctionArgList Args; 4562 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4563 ImplicitParamDecl::Other); 4564 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4565 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4566 ImplicitParamDecl::Other); 4567 Args.push_back(&GtidArg); 4568 Args.push_back(&TaskTypeArg); 4569 const auto &DestructorFnInfo = 4570 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4571 llvm::FunctionType *DestructorFnTy = 4572 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4573 std::string Name = 4574 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4575 auto *DestructorFn = 4576 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4577 Name, &CGM.getModule()); 4578 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4579 DestructorFnInfo); 4580 DestructorFn->setDoesNotRecurse(); 4581 CodeGenFunction CGF(CGM); 4582 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4583 Args, Loc, Loc); 4584 4585 LValue Base = CGF.EmitLoadOfPointerLValue( 4586 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4587 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4588 const auto *KmpTaskTWithPrivatesQTyRD = 4589 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4590 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4591 Base = CGF.EmitLValueForField(Base, *FI); 4592 for (const auto *Field : 4593 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4594 if (QualType::DestructionKind DtorKind = 4595 Field->getType().isDestructedType()) { 4596 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4597 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4598 } 4599 } 4600 CGF.FinishFunction(); 4601 return DestructorFn; 4602 } 4603 4604 /// Emit a privates mapping function for correct handling of private and 4605 /// firstprivate variables. 4606 /// \code 4607 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4608 /// **noalias priv1,..., <tyn> **noalias privn) { 4609 /// *priv1 = &.privates.priv1; 4610 /// ...; 4611 /// *privn = &.privates.privn; 4612 /// } 4613 /// \endcode 4614 static llvm::Value * 4615 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4616 ArrayRef<const Expr *> PrivateVars, 4617 ArrayRef<const Expr *> FirstprivateVars, 4618 ArrayRef<const Expr *> LastprivateVars, 4619 QualType PrivatesQTy, 4620 ArrayRef<PrivateDataTy> Privates) { 4621 ASTContext &C = CGM.getContext(); 4622 FunctionArgList Args; 4623 ImplicitParamDecl TaskPrivatesArg( 4624 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4625 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4626 ImplicitParamDecl::Other); 4627 Args.push_back(&TaskPrivatesArg); 4628 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4629 unsigned Counter = 1; 4630 for (const Expr *E : PrivateVars) { 4631 Args.push_back(ImplicitParamDecl::Create( 4632 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4633 C.getPointerType(C.getPointerType(E->getType())) 4634 .withConst() 4635 .withRestrict(), 4636 ImplicitParamDecl::Other)); 4637 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4638 PrivateVarsPos[VD] = Counter; 4639 ++Counter; 4640 } 4641 for (const Expr *E : FirstprivateVars) { 4642 Args.push_back(ImplicitParamDecl::Create( 4643 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4644 C.getPointerType(C.getPointerType(E->getType())) 4645 .withConst() 4646 .withRestrict(), 4647 ImplicitParamDecl::Other)); 4648 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4649 PrivateVarsPos[VD] = Counter; 4650 ++Counter; 4651 } 4652 for (const Expr *E : LastprivateVars) { 4653 Args.push_back(ImplicitParamDecl::Create( 4654 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4655 C.getPointerType(C.getPointerType(E->getType())) 4656 .withConst() 4657 .withRestrict(), 4658 ImplicitParamDecl::Other)); 4659 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4660 PrivateVarsPos[VD] = Counter; 4661 ++Counter; 4662 } 4663 const auto &TaskPrivatesMapFnInfo = 4664 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4665 llvm::FunctionType *TaskPrivatesMapTy = 4666 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4667 std::string Name = 4668 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4669 auto *TaskPrivatesMap = llvm::Function::Create( 4670 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4671 &CGM.getModule()); 4672 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4673 TaskPrivatesMapFnInfo); 4674 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4675 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4676 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4677 CodeGenFunction CGF(CGM); 4678 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4679 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4680 4681 // *privi = &.privates.privi; 4682 LValue Base = CGF.EmitLoadOfPointerLValue( 4683 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4684 TaskPrivatesArg.getType()->castAs<PointerType>()); 4685 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4686 Counter = 0; 4687 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4688 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4689 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4690 LValue RefLVal = 4691 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4692 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4693 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4694 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4695 ++Counter; 4696 } 4697 CGF.FinishFunction(); 4698 return TaskPrivatesMap; 4699 } 4700 4701 /// Emit initialization for private variables in task-based directives. 4702 static void emitPrivatesInit(CodeGenFunction &CGF, 4703 const OMPExecutableDirective &D, 4704 Address KmpTaskSharedsPtr, LValue TDBase, 4705 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4706 QualType SharedsTy, QualType SharedsPtrTy, 4707 const OMPTaskDataTy &Data, 4708 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4709 ASTContext &C = CGF.getContext(); 4710 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4711 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4712 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4713 ? OMPD_taskloop 4714 : OMPD_task; 4715 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4716 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4717 LValue SrcBase; 4718 bool IsTargetTask = 4719 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4720 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4721 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4722 // PointersArray and SizesArray. The original variables for these arrays are 4723 // not captured and we get their addresses explicitly. 4724 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4725 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4726 SrcBase = CGF.MakeAddrLValue( 4727 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4728 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4729 SharedsTy); 4730 } 4731 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4732 for (const PrivateDataTy &Pair : Privates) { 4733 const VarDecl *VD = Pair.second.PrivateCopy; 4734 const Expr *Init = VD->getAnyInitializer(); 4735 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4736 !CGF.isTrivialInitializer(Init)))) { 4737 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4738 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4739 const VarDecl *OriginalVD = Pair.second.Original; 4740 // Check if the variable is the target-based BasePointersArray, 4741 // PointersArray or SizesArray. 4742 LValue SharedRefLValue; 4743 QualType Type = PrivateLValue.getType(); 4744 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4745 if (IsTargetTask && !SharedField) { 4746 assert(isa<ImplicitParamDecl>(OriginalVD) && 4747 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4748 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4749 ->getNumParams() == 0 && 4750 isa<TranslationUnitDecl>( 4751 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4752 ->getDeclContext()) && 4753 "Expected artificial target data variable."); 4754 SharedRefLValue = 4755 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4756 } else { 4757 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4758 SharedRefLValue = CGF.MakeAddrLValue( 4759 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4760 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4761 SharedRefLValue.getTBAAInfo()); 4762 } 4763 if (Type->isArrayType()) { 4764 // Initialize firstprivate array. 4765 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4766 // Perform simple memcpy. 4767 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4768 } else { 4769 // Initialize firstprivate array using element-by-element 4770 // initialization. 4771 CGF.EmitOMPAggregateAssign( 4772 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4773 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4774 Address SrcElement) { 4775 // Clean up any temporaries needed by the initialization. 4776 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4777 InitScope.addPrivate( 4778 Elem, [SrcElement]() -> Address { return SrcElement; }); 4779 (void)InitScope.Privatize(); 4780 // Emit initialization for single element. 4781 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4782 CGF, &CapturesInfo); 4783 CGF.EmitAnyExprToMem(Init, DestElement, 4784 Init->getType().getQualifiers(), 4785 /*IsInitializer=*/false); 4786 }); 4787 } 4788 } else { 4789 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4790 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4791 return SharedRefLValue.getAddress(); 4792 }); 4793 (void)InitScope.Privatize(); 4794 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4795 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4796 /*capturedByInit=*/false); 4797 } 4798 } else { 4799 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4800 } 4801 } 4802 ++FI; 4803 } 4804 } 4805 4806 /// Check if duplication function is required for taskloops. 4807 static bool checkInitIsRequired(CodeGenFunction &CGF, 4808 ArrayRef<PrivateDataTy> Privates) { 4809 bool InitRequired = false; 4810 for (const PrivateDataTy &Pair : Privates) { 4811 const VarDecl *VD = Pair.second.PrivateCopy; 4812 const Expr *Init = VD->getAnyInitializer(); 4813 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4814 !CGF.isTrivialInitializer(Init)); 4815 if (InitRequired) 4816 break; 4817 } 4818 return InitRequired; 4819 } 4820 4821 4822 /// Emit task_dup function (for initialization of 4823 /// private/firstprivate/lastprivate vars and last_iter flag) 4824 /// \code 4825 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4826 /// lastpriv) { 4827 /// // setup lastprivate flag 4828 /// task_dst->last = lastpriv; 4829 /// // could be constructor calls here... 4830 /// } 4831 /// \endcode 4832 static llvm::Value * 4833 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4834 const OMPExecutableDirective &D, 4835 QualType KmpTaskTWithPrivatesPtrQTy, 4836 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4837 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4838 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4839 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4840 ASTContext &C = CGM.getContext(); 4841 FunctionArgList Args; 4842 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4843 KmpTaskTWithPrivatesPtrQTy, 4844 ImplicitParamDecl::Other); 4845 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4846 KmpTaskTWithPrivatesPtrQTy, 4847 ImplicitParamDecl::Other); 4848 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4849 ImplicitParamDecl::Other); 4850 Args.push_back(&DstArg); 4851 Args.push_back(&SrcArg); 4852 Args.push_back(&LastprivArg); 4853 const auto &TaskDupFnInfo = 4854 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4855 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4856 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4857 auto *TaskDup = llvm::Function::Create( 4858 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4859 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4860 TaskDup->setDoesNotRecurse(); 4861 CodeGenFunction CGF(CGM); 4862 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4863 Loc); 4864 4865 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4866 CGF.GetAddrOfLocalVar(&DstArg), 4867 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4868 // task_dst->liter = lastpriv; 4869 if (WithLastIter) { 4870 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4871 LValue Base = CGF.EmitLValueForField( 4872 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4873 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4874 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4875 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4876 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4877 } 4878 4879 // Emit initial values for private copies (if any). 4880 assert(!Privates.empty()); 4881 Address KmpTaskSharedsPtr = Address::invalid(); 4882 if (!Data.FirstprivateVars.empty()) { 4883 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4884 CGF.GetAddrOfLocalVar(&SrcArg), 4885 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4886 LValue Base = CGF.EmitLValueForField( 4887 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4888 KmpTaskSharedsPtr = Address( 4889 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4890 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4891 KmpTaskTShareds)), 4892 Loc), 4893 CGF.getNaturalTypeAlignment(SharedsTy)); 4894 } 4895 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4896 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4897 CGF.FinishFunction(); 4898 return TaskDup; 4899 } 4900 4901 /// Checks if destructor function is required to be generated. 4902 /// \return true if cleanups are required, false otherwise. 4903 static bool 4904 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4905 bool NeedsCleanup = false; 4906 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4907 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4908 for (const FieldDecl *FD : PrivateRD->fields()) { 4909 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4910 if (NeedsCleanup) 4911 break; 4912 } 4913 return NeedsCleanup; 4914 } 4915 4916 CGOpenMPRuntime::TaskResultTy 4917 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4918 const OMPExecutableDirective &D, 4919 llvm::Function *TaskFunction, QualType SharedsTy, 4920 Address Shareds, const OMPTaskDataTy &Data) { 4921 ASTContext &C = CGM.getContext(); 4922 llvm::SmallVector<PrivateDataTy, 4> Privates; 4923 // Aggregate privates and sort them by the alignment. 4924 auto I = Data.PrivateCopies.begin(); 4925 for (const Expr *E : Data.PrivateVars) { 4926 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4927 Privates.emplace_back( 4928 C.getDeclAlign(VD), 4929 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4930 /*PrivateElemInit=*/nullptr)); 4931 ++I; 4932 } 4933 I = Data.FirstprivateCopies.begin(); 4934 auto IElemInitRef = Data.FirstprivateInits.begin(); 4935 for (const Expr *E : Data.FirstprivateVars) { 4936 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4937 Privates.emplace_back( 4938 C.getDeclAlign(VD), 4939 PrivateHelpersTy( 4940 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4941 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4942 ++I; 4943 ++IElemInitRef; 4944 } 4945 I = Data.LastprivateCopies.begin(); 4946 for (const Expr *E : Data.LastprivateVars) { 4947 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4948 Privates.emplace_back( 4949 C.getDeclAlign(VD), 4950 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4951 /*PrivateElemInit=*/nullptr)); 4952 ++I; 4953 } 4954 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4955 return L.first > R.first; 4956 }); 4957 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4958 // Build type kmp_routine_entry_t (if not built yet). 4959 emitKmpRoutineEntryT(KmpInt32Ty); 4960 // Build type kmp_task_t (if not built yet). 4961 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4962 if (SavedKmpTaskloopTQTy.isNull()) { 4963 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4964 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4965 } 4966 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4967 } else { 4968 assert((D.getDirectiveKind() == OMPD_task || 4969 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4970 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4971 "Expected taskloop, task or target directive"); 4972 if (SavedKmpTaskTQTy.isNull()) { 4973 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4974 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4975 } 4976 KmpTaskTQTy = SavedKmpTaskTQTy; 4977 } 4978 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4979 // Build particular struct kmp_task_t for the given task. 4980 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4981 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4982 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4983 QualType KmpTaskTWithPrivatesPtrQTy = 4984 C.getPointerType(KmpTaskTWithPrivatesQTy); 4985 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4986 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4987 KmpTaskTWithPrivatesTy->getPointerTo(); 4988 llvm::Value *KmpTaskTWithPrivatesTySize = 4989 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4990 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4991 4992 // Emit initial values for private copies (if any). 4993 llvm::Value *TaskPrivatesMap = nullptr; 4994 llvm::Type *TaskPrivatesMapTy = 4995 std::next(TaskFunction->arg_begin(), 3)->getType(); 4996 if (!Privates.empty()) { 4997 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4998 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4999 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5000 FI->getType(), Privates); 5001 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5002 TaskPrivatesMap, TaskPrivatesMapTy); 5003 } else { 5004 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5005 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5006 } 5007 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5008 // kmp_task_t *tt); 5009 llvm::Function *TaskEntry = emitProxyTaskFunction( 5010 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5011 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5012 TaskPrivatesMap); 5013 5014 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5015 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5016 // kmp_routine_entry_t *task_entry); 5017 // Task flags. Format is taken from 5018 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5019 // description of kmp_tasking_flags struct. 5020 enum { 5021 TiedFlag = 0x1, 5022 FinalFlag = 0x2, 5023 DestructorsFlag = 0x8, 5024 PriorityFlag = 0x20 5025 }; 5026 unsigned Flags = Data.Tied ? TiedFlag : 0; 5027 bool NeedsCleanup = false; 5028 if (!Privates.empty()) { 5029 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5030 if (NeedsCleanup) 5031 Flags = Flags | DestructorsFlag; 5032 } 5033 if (Data.Priority.getInt()) 5034 Flags = Flags | PriorityFlag; 5035 llvm::Value *TaskFlags = 5036 Data.Final.getPointer() 5037 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5038 CGF.Builder.getInt32(FinalFlag), 5039 CGF.Builder.getInt32(/*C=*/0)) 5040 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5041 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5042 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5043 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 5044 getThreadID(CGF, Loc), TaskFlags, 5045 KmpTaskTWithPrivatesTySize, SharedsSize, 5046 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5047 TaskEntry, KmpRoutineEntryPtrTy)}; 5048 llvm::Value *NewTask = CGF.EmitRuntimeCall( 5049 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5050 llvm::Value *NewTaskNewTaskTTy = 5051 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5052 NewTask, KmpTaskTWithPrivatesPtrTy); 5053 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5054 KmpTaskTWithPrivatesQTy); 5055 LValue TDBase = 5056 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5057 // Fill the data in the resulting kmp_task_t record. 5058 // Copy shareds if there are any. 5059 Address KmpTaskSharedsPtr = Address::invalid(); 5060 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5061 KmpTaskSharedsPtr = 5062 Address(CGF.EmitLoadOfScalar( 5063 CGF.EmitLValueForField( 5064 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5065 KmpTaskTShareds)), 5066 Loc), 5067 CGF.getNaturalTypeAlignment(SharedsTy)); 5068 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5069 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5070 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5071 } 5072 // Emit initial values for private copies (if any). 5073 TaskResultTy Result; 5074 if (!Privates.empty()) { 5075 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5076 SharedsTy, SharedsPtrTy, Data, Privates, 5077 /*ForDup=*/false); 5078 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5079 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5080 Result.TaskDupFn = emitTaskDupFunction( 5081 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5082 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5083 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5084 } 5085 } 5086 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5087 enum { Priority = 0, Destructors = 1 }; 5088 // Provide pointer to function with destructors for privates. 5089 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5090 const RecordDecl *KmpCmplrdataUD = 5091 (*FI)->getType()->getAsUnionType()->getDecl(); 5092 if (NeedsCleanup) { 5093 llvm::Value *DestructorFn = emitDestructorsFunction( 5094 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5095 KmpTaskTWithPrivatesQTy); 5096 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5097 LValue DestructorsLV = CGF.EmitLValueForField( 5098 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5099 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5100 DestructorFn, KmpRoutineEntryPtrTy), 5101 DestructorsLV); 5102 } 5103 // Set priority. 5104 if (Data.Priority.getInt()) { 5105 LValue Data2LV = CGF.EmitLValueForField( 5106 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5107 LValue PriorityLV = CGF.EmitLValueForField( 5108 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5109 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5110 } 5111 Result.NewTask = NewTask; 5112 Result.TaskEntry = TaskEntry; 5113 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5114 Result.TDBase = TDBase; 5115 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5116 return Result; 5117 } 5118 5119 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5120 const OMPExecutableDirective &D, 5121 llvm::Function *TaskFunction, 5122 QualType SharedsTy, Address Shareds, 5123 const Expr *IfCond, 5124 const OMPTaskDataTy &Data) { 5125 if (!CGF.HaveInsertPoint()) 5126 return; 5127 5128 TaskResultTy Result = 5129 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5130 llvm::Value *NewTask = Result.NewTask; 5131 llvm::Function *TaskEntry = Result.TaskEntry; 5132 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5133 LValue TDBase = Result.TDBase; 5134 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5135 ASTContext &C = CGM.getContext(); 5136 // Process list of dependences. 5137 Address DependenciesArray = Address::invalid(); 5138 unsigned NumDependencies = Data.Dependences.size(); 5139 if (NumDependencies) { 5140 // Dependence kind for RTL. 5141 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5142 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5143 RecordDecl *KmpDependInfoRD; 5144 QualType FlagsTy = 5145 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5146 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5147 if (KmpDependInfoTy.isNull()) { 5148 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5149 KmpDependInfoRD->startDefinition(); 5150 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5151 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5152 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5153 KmpDependInfoRD->completeDefinition(); 5154 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5155 } else { 5156 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5157 } 5158 // Define type kmp_depend_info[<Dependences.size()>]; 5159 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5160 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5161 ArrayType::Normal, /*IndexTypeQuals=*/0); 5162 // kmp_depend_info[<Dependences.size()>] deps; 5163 DependenciesArray = 5164 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5165 for (unsigned I = 0; I < NumDependencies; ++I) { 5166 const Expr *E = Data.Dependences[I].second; 5167 LValue Addr = CGF.EmitLValue(E); 5168 llvm::Value *Size; 5169 QualType Ty = E->getType(); 5170 if (const auto *ASE = 5171 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5172 LValue UpAddrLVal = 5173 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 5174 llvm::Value *UpAddr = 5175 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5176 llvm::Value *LowIntPtr = 5177 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5178 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5179 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5180 } else { 5181 Size = CGF.getTypeSize(Ty); 5182 } 5183 LValue Base = CGF.MakeAddrLValue( 5184 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5185 KmpDependInfoTy); 5186 // deps[i].base_addr = &<Dependences[i].second>; 5187 LValue BaseAddrLVal = CGF.EmitLValueForField( 5188 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5189 CGF.EmitStoreOfScalar( 5190 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5191 BaseAddrLVal); 5192 // deps[i].len = sizeof(<Dependences[i].second>); 5193 LValue LenLVal = CGF.EmitLValueForField( 5194 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5195 CGF.EmitStoreOfScalar(Size, LenLVal); 5196 // deps[i].flags = <Dependences[i].first>; 5197 RTLDependenceKindTy DepKind; 5198 switch (Data.Dependences[I].first) { 5199 case OMPC_DEPEND_in: 5200 DepKind = DepIn; 5201 break; 5202 // Out and InOut dependencies must use the same code. 5203 case OMPC_DEPEND_out: 5204 case OMPC_DEPEND_inout: 5205 DepKind = DepInOut; 5206 break; 5207 case OMPC_DEPEND_mutexinoutset: 5208 DepKind = DepMutexInOutSet; 5209 break; 5210 case OMPC_DEPEND_source: 5211 case OMPC_DEPEND_sink: 5212 case OMPC_DEPEND_unknown: 5213 llvm_unreachable("Unknown task dependence type"); 5214 } 5215 LValue FlagsLVal = CGF.EmitLValueForField( 5216 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5217 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5218 FlagsLVal); 5219 } 5220 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5221 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5222 } 5223 5224 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5225 // libcall. 5226 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5227 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5228 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5229 // list is not empty 5230 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5231 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5232 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5233 llvm::Value *DepTaskArgs[7]; 5234 if (NumDependencies) { 5235 DepTaskArgs[0] = UpLoc; 5236 DepTaskArgs[1] = ThreadID; 5237 DepTaskArgs[2] = NewTask; 5238 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5239 DepTaskArgs[4] = DependenciesArray.getPointer(); 5240 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5241 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5242 } 5243 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5244 &TaskArgs, 5245 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5246 if (!Data.Tied) { 5247 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5248 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5249 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5250 } 5251 if (NumDependencies) { 5252 CGF.EmitRuntimeCall( 5253 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5254 } else { 5255 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5256 TaskArgs); 5257 } 5258 // Check if parent region is untied and build return for untied task; 5259 if (auto *Region = 5260 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5261 Region->emitUntiedSwitch(CGF); 5262 }; 5263 5264 llvm::Value *DepWaitTaskArgs[6]; 5265 if (NumDependencies) { 5266 DepWaitTaskArgs[0] = UpLoc; 5267 DepWaitTaskArgs[1] = ThreadID; 5268 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5269 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5270 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5271 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5272 } 5273 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5274 NumDependencies, &DepWaitTaskArgs, 5275 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5276 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5277 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5278 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5279 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5280 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5281 // is specified. 5282 if (NumDependencies) 5283 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5284 DepWaitTaskArgs); 5285 // Call proxy_task_entry(gtid, new_task); 5286 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5287 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5288 Action.Enter(CGF); 5289 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5290 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5291 OutlinedFnArgs); 5292 }; 5293 5294 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5295 // kmp_task_t *new_task); 5296 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5297 // kmp_task_t *new_task); 5298 RegionCodeGenTy RCG(CodeGen); 5299 CommonActionTy Action( 5300 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5301 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5302 RCG.setAction(Action); 5303 RCG(CGF); 5304 }; 5305 5306 if (IfCond) { 5307 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5308 } else { 5309 RegionCodeGenTy ThenRCG(ThenCodeGen); 5310 ThenRCG(CGF); 5311 } 5312 } 5313 5314 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5315 const OMPLoopDirective &D, 5316 llvm::Function *TaskFunction, 5317 QualType SharedsTy, Address Shareds, 5318 const Expr *IfCond, 5319 const OMPTaskDataTy &Data) { 5320 if (!CGF.HaveInsertPoint()) 5321 return; 5322 TaskResultTy Result = 5323 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5324 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5325 // libcall. 5326 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5327 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5328 // sched, kmp_uint64 grainsize, void *task_dup); 5329 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5330 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5331 llvm::Value *IfVal; 5332 if (IfCond) { 5333 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5334 /*isSigned=*/true); 5335 } else { 5336 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5337 } 5338 5339 LValue LBLVal = CGF.EmitLValueForField( 5340 Result.TDBase, 5341 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5342 const auto *LBVar = 5343 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5344 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5345 /*IsInitializer=*/true); 5346 LValue UBLVal = CGF.EmitLValueForField( 5347 Result.TDBase, 5348 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5349 const auto *UBVar = 5350 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5351 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5352 /*IsInitializer=*/true); 5353 LValue StLVal = CGF.EmitLValueForField( 5354 Result.TDBase, 5355 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5356 const auto *StVar = 5357 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5358 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5359 /*IsInitializer=*/true); 5360 // Store reductions address. 5361 LValue RedLVal = CGF.EmitLValueForField( 5362 Result.TDBase, 5363 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5364 if (Data.Reductions) { 5365 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5366 } else { 5367 CGF.EmitNullInitialization(RedLVal.getAddress(), 5368 CGF.getContext().VoidPtrTy); 5369 } 5370 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5371 llvm::Value *TaskArgs[] = { 5372 UpLoc, 5373 ThreadID, 5374 Result.NewTask, 5375 IfVal, 5376 LBLVal.getPointer(), 5377 UBLVal.getPointer(), 5378 CGF.EmitLoadOfScalar(StLVal, Loc), 5379 llvm::ConstantInt::getSigned( 5380 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5381 llvm::ConstantInt::getSigned( 5382 CGF.IntTy, Data.Schedule.getPointer() 5383 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5384 : NoSchedule), 5385 Data.Schedule.getPointer() 5386 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5387 /*isSigned=*/false) 5388 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5389 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5390 Result.TaskDupFn, CGF.VoidPtrTy) 5391 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5392 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5393 } 5394 5395 /// Emit reduction operation for each element of array (required for 5396 /// array sections) LHS op = RHS. 5397 /// \param Type Type of array. 5398 /// \param LHSVar Variable on the left side of the reduction operation 5399 /// (references element of array in original variable). 5400 /// \param RHSVar Variable on the right side of the reduction operation 5401 /// (references element of array in original variable). 5402 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5403 /// RHSVar. 5404 static void EmitOMPAggregateReduction( 5405 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5406 const VarDecl *RHSVar, 5407 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5408 const Expr *, const Expr *)> &RedOpGen, 5409 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5410 const Expr *UpExpr = nullptr) { 5411 // Perform element-by-element initialization. 5412 QualType ElementTy; 5413 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5414 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5415 5416 // Drill down to the base element type on both arrays. 5417 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5418 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5419 5420 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5421 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5422 // Cast from pointer to array type to pointer to single element. 5423 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5424 // The basic structure here is a while-do loop. 5425 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5426 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5427 llvm::Value *IsEmpty = 5428 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5429 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5430 5431 // Enter the loop body, making that address the current address. 5432 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5433 CGF.EmitBlock(BodyBB); 5434 5435 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5436 5437 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5438 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5439 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5440 Address RHSElementCurrent = 5441 Address(RHSElementPHI, 5442 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5443 5444 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5445 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5446 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5447 Address LHSElementCurrent = 5448 Address(LHSElementPHI, 5449 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5450 5451 // Emit copy. 5452 CodeGenFunction::OMPPrivateScope Scope(CGF); 5453 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5454 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5455 Scope.Privatize(); 5456 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5457 Scope.ForceCleanup(); 5458 5459 // Shift the address forward by one element. 5460 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5461 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5462 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5463 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5464 // Check whether we've reached the end. 5465 llvm::Value *Done = 5466 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5467 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5468 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5469 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5470 5471 // Done. 5472 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5473 } 5474 5475 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5476 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5477 /// UDR combiner function. 5478 static void emitReductionCombiner(CodeGenFunction &CGF, 5479 const Expr *ReductionOp) { 5480 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5481 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5482 if (const auto *DRE = 5483 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5484 if (const auto *DRD = 5485 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5486 std::pair<llvm::Function *, llvm::Function *> Reduction = 5487 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5488 RValue Func = RValue::get(Reduction.first); 5489 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5490 CGF.EmitIgnoredExpr(ReductionOp); 5491 return; 5492 } 5493 CGF.EmitIgnoredExpr(ReductionOp); 5494 } 5495 5496 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5497 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5498 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5499 ArrayRef<const Expr *> ReductionOps) { 5500 ASTContext &C = CGM.getContext(); 5501 5502 // void reduction_func(void *LHSArg, void *RHSArg); 5503 FunctionArgList Args; 5504 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5505 ImplicitParamDecl::Other); 5506 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5507 ImplicitParamDecl::Other); 5508 Args.push_back(&LHSArg); 5509 Args.push_back(&RHSArg); 5510 const auto &CGFI = 5511 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5512 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5513 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5514 llvm::GlobalValue::InternalLinkage, Name, 5515 &CGM.getModule()); 5516 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5517 Fn->setDoesNotRecurse(); 5518 CodeGenFunction CGF(CGM); 5519 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5520 5521 // Dst = (void*[n])(LHSArg); 5522 // Src = (void*[n])(RHSArg); 5523 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5524 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5525 ArgsType), CGF.getPointerAlign()); 5526 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5527 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5528 ArgsType), CGF.getPointerAlign()); 5529 5530 // ... 5531 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5532 // ... 5533 CodeGenFunction::OMPPrivateScope Scope(CGF); 5534 auto IPriv = Privates.begin(); 5535 unsigned Idx = 0; 5536 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5537 const auto *RHSVar = 5538 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5539 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5540 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5541 }); 5542 const auto *LHSVar = 5543 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5544 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5545 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5546 }); 5547 QualType PrivTy = (*IPriv)->getType(); 5548 if (PrivTy->isVariablyModifiedType()) { 5549 // Get array size and emit VLA type. 5550 ++Idx; 5551 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5552 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5553 const VariableArrayType *VLA = 5554 CGF.getContext().getAsVariableArrayType(PrivTy); 5555 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5556 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5557 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5558 CGF.EmitVariablyModifiedType(PrivTy); 5559 } 5560 } 5561 Scope.Privatize(); 5562 IPriv = Privates.begin(); 5563 auto ILHS = LHSExprs.begin(); 5564 auto IRHS = RHSExprs.begin(); 5565 for (const Expr *E : ReductionOps) { 5566 if ((*IPriv)->getType()->isArrayType()) { 5567 // Emit reduction for array section. 5568 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5569 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5570 EmitOMPAggregateReduction( 5571 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5572 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5573 emitReductionCombiner(CGF, E); 5574 }); 5575 } else { 5576 // Emit reduction for array subscript or single variable. 5577 emitReductionCombiner(CGF, E); 5578 } 5579 ++IPriv; 5580 ++ILHS; 5581 ++IRHS; 5582 } 5583 Scope.ForceCleanup(); 5584 CGF.FinishFunction(); 5585 return Fn; 5586 } 5587 5588 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5589 const Expr *ReductionOp, 5590 const Expr *PrivateRef, 5591 const DeclRefExpr *LHS, 5592 const DeclRefExpr *RHS) { 5593 if (PrivateRef->getType()->isArrayType()) { 5594 // Emit reduction for array section. 5595 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5596 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5597 EmitOMPAggregateReduction( 5598 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5599 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5600 emitReductionCombiner(CGF, ReductionOp); 5601 }); 5602 } else { 5603 // Emit reduction for array subscript or single variable. 5604 emitReductionCombiner(CGF, ReductionOp); 5605 } 5606 } 5607 5608 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5609 ArrayRef<const Expr *> Privates, 5610 ArrayRef<const Expr *> LHSExprs, 5611 ArrayRef<const Expr *> RHSExprs, 5612 ArrayRef<const Expr *> ReductionOps, 5613 ReductionOptionsTy Options) { 5614 if (!CGF.HaveInsertPoint()) 5615 return; 5616 5617 bool WithNowait = Options.WithNowait; 5618 bool SimpleReduction = Options.SimpleReduction; 5619 5620 // Next code should be emitted for reduction: 5621 // 5622 // static kmp_critical_name lock = { 0 }; 5623 // 5624 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5625 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5626 // ... 5627 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5628 // *(Type<n>-1*)rhs[<n>-1]); 5629 // } 5630 // 5631 // ... 5632 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5633 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5634 // RedList, reduce_func, &<lock>)) { 5635 // case 1: 5636 // ... 5637 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5638 // ... 5639 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5640 // break; 5641 // case 2: 5642 // ... 5643 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5644 // ... 5645 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5646 // break; 5647 // default:; 5648 // } 5649 // 5650 // if SimpleReduction is true, only the next code is generated: 5651 // ... 5652 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5653 // ... 5654 5655 ASTContext &C = CGM.getContext(); 5656 5657 if (SimpleReduction) { 5658 CodeGenFunction::RunCleanupsScope Scope(CGF); 5659 auto IPriv = Privates.begin(); 5660 auto ILHS = LHSExprs.begin(); 5661 auto IRHS = RHSExprs.begin(); 5662 for (const Expr *E : ReductionOps) { 5663 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5664 cast<DeclRefExpr>(*IRHS)); 5665 ++IPriv; 5666 ++ILHS; 5667 ++IRHS; 5668 } 5669 return; 5670 } 5671 5672 // 1. Build a list of reduction variables. 5673 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5674 auto Size = RHSExprs.size(); 5675 for (const Expr *E : Privates) { 5676 if (E->getType()->isVariablyModifiedType()) 5677 // Reserve place for array size. 5678 ++Size; 5679 } 5680 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5681 QualType ReductionArrayTy = 5682 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5683 /*IndexTypeQuals=*/0); 5684 Address ReductionList = 5685 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5686 auto IPriv = Privates.begin(); 5687 unsigned Idx = 0; 5688 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5689 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5690 CGF.Builder.CreateStore( 5691 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5692 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5693 Elem); 5694 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5695 // Store array size. 5696 ++Idx; 5697 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5698 llvm::Value *Size = CGF.Builder.CreateIntCast( 5699 CGF.getVLASize( 5700 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5701 .NumElts, 5702 CGF.SizeTy, /*isSigned=*/false); 5703 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5704 Elem); 5705 } 5706 } 5707 5708 // 2. Emit reduce_func(). 5709 llvm::Function *ReductionFn = emitReductionFunction( 5710 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5711 LHSExprs, RHSExprs, ReductionOps); 5712 5713 // 3. Create static kmp_critical_name lock = { 0 }; 5714 std::string Name = getName({"reduction"}); 5715 llvm::Value *Lock = getCriticalRegionLock(Name); 5716 5717 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5718 // RedList, reduce_func, &<lock>); 5719 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5720 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5721 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5722 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5723 ReductionList.getPointer(), CGF.VoidPtrTy); 5724 llvm::Value *Args[] = { 5725 IdentTLoc, // ident_t *<loc> 5726 ThreadId, // i32 <gtid> 5727 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5728 ReductionArrayTySize, // size_type sizeof(RedList) 5729 RL, // void *RedList 5730 ReductionFn, // void (*) (void *, void *) <reduce_func> 5731 Lock // kmp_critical_name *&<lock> 5732 }; 5733 llvm::Value *Res = CGF.EmitRuntimeCall( 5734 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5735 : OMPRTL__kmpc_reduce), 5736 Args); 5737 5738 // 5. Build switch(res) 5739 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5740 llvm::SwitchInst *SwInst = 5741 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5742 5743 // 6. Build case 1: 5744 // ... 5745 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5746 // ... 5747 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5748 // break; 5749 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5750 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5751 CGF.EmitBlock(Case1BB); 5752 5753 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5754 llvm::Value *EndArgs[] = { 5755 IdentTLoc, // ident_t *<loc> 5756 ThreadId, // i32 <gtid> 5757 Lock // kmp_critical_name *&<lock> 5758 }; 5759 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5760 CodeGenFunction &CGF, PrePostActionTy &Action) { 5761 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5762 auto IPriv = Privates.begin(); 5763 auto ILHS = LHSExprs.begin(); 5764 auto IRHS = RHSExprs.begin(); 5765 for (const Expr *E : ReductionOps) { 5766 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5767 cast<DeclRefExpr>(*IRHS)); 5768 ++IPriv; 5769 ++ILHS; 5770 ++IRHS; 5771 } 5772 }; 5773 RegionCodeGenTy RCG(CodeGen); 5774 CommonActionTy Action( 5775 nullptr, llvm::None, 5776 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5777 : OMPRTL__kmpc_end_reduce), 5778 EndArgs); 5779 RCG.setAction(Action); 5780 RCG(CGF); 5781 5782 CGF.EmitBranch(DefaultBB); 5783 5784 // 7. Build case 2: 5785 // ... 5786 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5787 // ... 5788 // break; 5789 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5790 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5791 CGF.EmitBlock(Case2BB); 5792 5793 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5794 CodeGenFunction &CGF, PrePostActionTy &Action) { 5795 auto ILHS = LHSExprs.begin(); 5796 auto IRHS = RHSExprs.begin(); 5797 auto IPriv = Privates.begin(); 5798 for (const Expr *E : ReductionOps) { 5799 const Expr *XExpr = nullptr; 5800 const Expr *EExpr = nullptr; 5801 const Expr *UpExpr = nullptr; 5802 BinaryOperatorKind BO = BO_Comma; 5803 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5804 if (BO->getOpcode() == BO_Assign) { 5805 XExpr = BO->getLHS(); 5806 UpExpr = BO->getRHS(); 5807 } 5808 } 5809 // Try to emit update expression as a simple atomic. 5810 const Expr *RHSExpr = UpExpr; 5811 if (RHSExpr) { 5812 // Analyze RHS part of the whole expression. 5813 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5814 RHSExpr->IgnoreParenImpCasts())) { 5815 // If this is a conditional operator, analyze its condition for 5816 // min/max reduction operator. 5817 RHSExpr = ACO->getCond(); 5818 } 5819 if (const auto *BORHS = 5820 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5821 EExpr = BORHS->getRHS(); 5822 BO = BORHS->getOpcode(); 5823 } 5824 } 5825 if (XExpr) { 5826 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5827 auto &&AtomicRedGen = [BO, VD, 5828 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5829 const Expr *EExpr, const Expr *UpExpr) { 5830 LValue X = CGF.EmitLValue(XExpr); 5831 RValue E; 5832 if (EExpr) 5833 E = CGF.EmitAnyExpr(EExpr); 5834 CGF.EmitOMPAtomicSimpleUpdateExpr( 5835 X, E, BO, /*IsXLHSInRHSPart=*/true, 5836 llvm::AtomicOrdering::Monotonic, Loc, 5837 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5838 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5839 PrivateScope.addPrivate( 5840 VD, [&CGF, VD, XRValue, Loc]() { 5841 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5842 CGF.emitOMPSimpleStore( 5843 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5844 VD->getType().getNonReferenceType(), Loc); 5845 return LHSTemp; 5846 }); 5847 (void)PrivateScope.Privatize(); 5848 return CGF.EmitAnyExpr(UpExpr); 5849 }); 5850 }; 5851 if ((*IPriv)->getType()->isArrayType()) { 5852 // Emit atomic reduction for array section. 5853 const auto *RHSVar = 5854 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5855 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5856 AtomicRedGen, XExpr, EExpr, UpExpr); 5857 } else { 5858 // Emit atomic reduction for array subscript or single variable. 5859 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5860 } 5861 } else { 5862 // Emit as a critical region. 5863 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5864 const Expr *, const Expr *) { 5865 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5866 std::string Name = RT.getName({"atomic_reduction"}); 5867 RT.emitCriticalRegion( 5868 CGF, Name, 5869 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5870 Action.Enter(CGF); 5871 emitReductionCombiner(CGF, E); 5872 }, 5873 Loc); 5874 }; 5875 if ((*IPriv)->getType()->isArrayType()) { 5876 const auto *LHSVar = 5877 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5878 const auto *RHSVar = 5879 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5880 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5881 CritRedGen); 5882 } else { 5883 CritRedGen(CGF, nullptr, nullptr, nullptr); 5884 } 5885 } 5886 ++ILHS; 5887 ++IRHS; 5888 ++IPriv; 5889 } 5890 }; 5891 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5892 if (!WithNowait) { 5893 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5894 llvm::Value *EndArgs[] = { 5895 IdentTLoc, // ident_t *<loc> 5896 ThreadId, // i32 <gtid> 5897 Lock // kmp_critical_name *&<lock> 5898 }; 5899 CommonActionTy Action(nullptr, llvm::None, 5900 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5901 EndArgs); 5902 AtomicRCG.setAction(Action); 5903 AtomicRCG(CGF); 5904 } else { 5905 AtomicRCG(CGF); 5906 } 5907 5908 CGF.EmitBranch(DefaultBB); 5909 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5910 } 5911 5912 /// Generates unique name for artificial threadprivate variables. 5913 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5914 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5915 const Expr *Ref) { 5916 SmallString<256> Buffer; 5917 llvm::raw_svector_ostream Out(Buffer); 5918 const clang::DeclRefExpr *DE; 5919 const VarDecl *D = ::getBaseDecl(Ref, DE); 5920 if (!D) 5921 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5922 D = D->getCanonicalDecl(); 5923 std::string Name = CGM.getOpenMPRuntime().getName( 5924 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5925 Out << Prefix << Name << "_" 5926 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5927 return Out.str(); 5928 } 5929 5930 /// Emits reduction initializer function: 5931 /// \code 5932 /// void @.red_init(void* %arg) { 5933 /// %0 = bitcast void* %arg to <type>* 5934 /// store <type> <init>, <type>* %0 5935 /// ret void 5936 /// } 5937 /// \endcode 5938 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5939 SourceLocation Loc, 5940 ReductionCodeGen &RCG, unsigned N) { 5941 ASTContext &C = CGM.getContext(); 5942 FunctionArgList Args; 5943 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5944 ImplicitParamDecl::Other); 5945 Args.emplace_back(&Param); 5946 const auto &FnInfo = 5947 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5948 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5949 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5950 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5951 Name, &CGM.getModule()); 5952 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5953 Fn->setDoesNotRecurse(); 5954 CodeGenFunction CGF(CGM); 5955 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5956 Address PrivateAddr = CGF.EmitLoadOfPointer( 5957 CGF.GetAddrOfLocalVar(&Param), 5958 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5959 llvm::Value *Size = nullptr; 5960 // If the size of the reduction item is non-constant, load it from global 5961 // threadprivate variable. 5962 if (RCG.getSizes(N).second) { 5963 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5964 CGF, CGM.getContext().getSizeType(), 5965 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5966 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5967 CGM.getContext().getSizeType(), Loc); 5968 } 5969 RCG.emitAggregateType(CGF, N, Size); 5970 LValue SharedLVal; 5971 // If initializer uses initializer from declare reduction construct, emit a 5972 // pointer to the address of the original reduction item (reuired by reduction 5973 // initializer) 5974 if (RCG.usesReductionInitializer(N)) { 5975 Address SharedAddr = 5976 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5977 CGF, CGM.getContext().VoidPtrTy, 5978 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5979 SharedAddr = CGF.EmitLoadOfPointer( 5980 SharedAddr, 5981 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5982 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5983 } else { 5984 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5985 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5986 CGM.getContext().VoidPtrTy); 5987 } 5988 // Emit the initializer: 5989 // %0 = bitcast void* %arg to <type>* 5990 // store <type> <init>, <type>* %0 5991 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5992 [](CodeGenFunction &) { return false; }); 5993 CGF.FinishFunction(); 5994 return Fn; 5995 } 5996 5997 /// Emits reduction combiner function: 5998 /// \code 5999 /// void @.red_comb(void* %arg0, void* %arg1) { 6000 /// %lhs = bitcast void* %arg0 to <type>* 6001 /// %rhs = bitcast void* %arg1 to <type>* 6002 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6003 /// store <type> %2, <type>* %lhs 6004 /// ret void 6005 /// } 6006 /// \endcode 6007 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6008 SourceLocation Loc, 6009 ReductionCodeGen &RCG, unsigned N, 6010 const Expr *ReductionOp, 6011 const Expr *LHS, const Expr *RHS, 6012 const Expr *PrivateRef) { 6013 ASTContext &C = CGM.getContext(); 6014 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6015 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6016 FunctionArgList Args; 6017 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6018 C.VoidPtrTy, ImplicitParamDecl::Other); 6019 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6020 ImplicitParamDecl::Other); 6021 Args.emplace_back(&ParamInOut); 6022 Args.emplace_back(&ParamIn); 6023 const auto &FnInfo = 6024 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6025 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6026 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6027 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6028 Name, &CGM.getModule()); 6029 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6030 Fn->setDoesNotRecurse(); 6031 CodeGenFunction CGF(CGM); 6032 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6033 llvm::Value *Size = nullptr; 6034 // If the size of the reduction item is non-constant, load it from global 6035 // threadprivate variable. 6036 if (RCG.getSizes(N).second) { 6037 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6038 CGF, CGM.getContext().getSizeType(), 6039 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6040 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6041 CGM.getContext().getSizeType(), Loc); 6042 } 6043 RCG.emitAggregateType(CGF, N, Size); 6044 // Remap lhs and rhs variables to the addresses of the function arguments. 6045 // %lhs = bitcast void* %arg0 to <type>* 6046 // %rhs = bitcast void* %arg1 to <type>* 6047 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6048 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6049 // Pull out the pointer to the variable. 6050 Address PtrAddr = CGF.EmitLoadOfPointer( 6051 CGF.GetAddrOfLocalVar(&ParamInOut), 6052 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6053 return CGF.Builder.CreateElementBitCast( 6054 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6055 }); 6056 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6057 // Pull out the pointer to the variable. 6058 Address PtrAddr = CGF.EmitLoadOfPointer( 6059 CGF.GetAddrOfLocalVar(&ParamIn), 6060 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6061 return CGF.Builder.CreateElementBitCast( 6062 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6063 }); 6064 PrivateScope.Privatize(); 6065 // Emit the combiner body: 6066 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6067 // store <type> %2, <type>* %lhs 6068 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6069 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6070 cast<DeclRefExpr>(RHS)); 6071 CGF.FinishFunction(); 6072 return Fn; 6073 } 6074 6075 /// Emits reduction finalizer function: 6076 /// \code 6077 /// void @.red_fini(void* %arg) { 6078 /// %0 = bitcast void* %arg to <type>* 6079 /// <destroy>(<type>* %0) 6080 /// ret void 6081 /// } 6082 /// \endcode 6083 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6084 SourceLocation Loc, 6085 ReductionCodeGen &RCG, unsigned N) { 6086 if (!RCG.needCleanups(N)) 6087 return nullptr; 6088 ASTContext &C = CGM.getContext(); 6089 FunctionArgList Args; 6090 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6091 ImplicitParamDecl::Other); 6092 Args.emplace_back(&Param); 6093 const auto &FnInfo = 6094 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6095 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6096 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6097 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6098 Name, &CGM.getModule()); 6099 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6100 Fn->setDoesNotRecurse(); 6101 CodeGenFunction CGF(CGM); 6102 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6103 Address PrivateAddr = CGF.EmitLoadOfPointer( 6104 CGF.GetAddrOfLocalVar(&Param), 6105 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6106 llvm::Value *Size = nullptr; 6107 // If the size of the reduction item is non-constant, load it from global 6108 // threadprivate variable. 6109 if (RCG.getSizes(N).second) { 6110 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6111 CGF, CGM.getContext().getSizeType(), 6112 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6113 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6114 CGM.getContext().getSizeType(), Loc); 6115 } 6116 RCG.emitAggregateType(CGF, N, Size); 6117 // Emit the finalizer body: 6118 // <destroy>(<type>* %0) 6119 RCG.emitCleanups(CGF, N, PrivateAddr); 6120 CGF.FinishFunction(); 6121 return Fn; 6122 } 6123 6124 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6125 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6126 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6127 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6128 return nullptr; 6129 6130 // Build typedef struct: 6131 // kmp_task_red_input { 6132 // void *reduce_shar; // shared reduction item 6133 // size_t reduce_size; // size of data item 6134 // void *reduce_init; // data initialization routine 6135 // void *reduce_fini; // data finalization routine 6136 // void *reduce_comb; // data combiner routine 6137 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6138 // } kmp_task_red_input_t; 6139 ASTContext &C = CGM.getContext(); 6140 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6141 RD->startDefinition(); 6142 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6143 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6144 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6145 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6146 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6147 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6148 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6149 RD->completeDefinition(); 6150 QualType RDType = C.getRecordType(RD); 6151 unsigned Size = Data.ReductionVars.size(); 6152 llvm::APInt ArraySize(/*numBits=*/64, Size); 6153 QualType ArrayRDType = C.getConstantArrayType( 6154 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6155 // kmp_task_red_input_t .rd_input.[Size]; 6156 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6157 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6158 Data.ReductionOps); 6159 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6160 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6161 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6162 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6163 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6164 TaskRedInput.getPointer(), Idxs, 6165 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6166 ".rd_input.gep."); 6167 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6168 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6169 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6170 RCG.emitSharedLValue(CGF, Cnt); 6171 llvm::Value *CastedShared = 6172 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6173 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6174 RCG.emitAggregateType(CGF, Cnt); 6175 llvm::Value *SizeValInChars; 6176 llvm::Value *SizeVal; 6177 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6178 // We use delayed creation/initialization for VLAs, array sections and 6179 // custom reduction initializations. It is required because runtime does not 6180 // provide the way to pass the sizes of VLAs/array sections to 6181 // initializer/combiner/finalizer functions and does not pass the pointer to 6182 // original reduction item to the initializer. Instead threadprivate global 6183 // variables are used to store these values and use them in the functions. 6184 bool DelayedCreation = !!SizeVal; 6185 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6186 /*isSigned=*/false); 6187 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6188 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6189 // ElemLVal.reduce_init = init; 6190 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6191 llvm::Value *InitAddr = 6192 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6193 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6194 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6195 // ElemLVal.reduce_fini = fini; 6196 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6197 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6198 llvm::Value *FiniAddr = Fini 6199 ? CGF.EmitCastToVoidPtr(Fini) 6200 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6201 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6202 // ElemLVal.reduce_comb = comb; 6203 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6204 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6205 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6206 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6207 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6208 // ElemLVal.flags = 0; 6209 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6210 if (DelayedCreation) { 6211 CGF.EmitStoreOfScalar( 6212 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 6213 FlagsLVal); 6214 } else 6215 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6216 } 6217 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6218 // *data); 6219 llvm::Value *Args[] = { 6220 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6221 /*isSigned=*/true), 6222 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6223 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6224 CGM.VoidPtrTy)}; 6225 return CGF.EmitRuntimeCall( 6226 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6227 } 6228 6229 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6230 SourceLocation Loc, 6231 ReductionCodeGen &RCG, 6232 unsigned N) { 6233 auto Sizes = RCG.getSizes(N); 6234 // Emit threadprivate global variable if the type is non-constant 6235 // (Sizes.second = nullptr). 6236 if (Sizes.second) { 6237 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6238 /*isSigned=*/false); 6239 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6240 CGF, CGM.getContext().getSizeType(), 6241 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6242 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6243 } 6244 // Store address of the original reduction item if custom initializer is used. 6245 if (RCG.usesReductionInitializer(N)) { 6246 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6247 CGF, CGM.getContext().VoidPtrTy, 6248 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6249 CGF.Builder.CreateStore( 6250 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6251 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6252 SharedAddr, /*IsVolatile=*/false); 6253 } 6254 } 6255 6256 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6257 SourceLocation Loc, 6258 llvm::Value *ReductionsPtr, 6259 LValue SharedLVal) { 6260 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6261 // *d); 6262 llvm::Value *Args[] = { 6263 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6264 /*isSigned=*/true), 6265 ReductionsPtr, 6266 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6267 CGM.VoidPtrTy)}; 6268 return Address( 6269 CGF.EmitRuntimeCall( 6270 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6271 SharedLVal.getAlignment()); 6272 } 6273 6274 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6275 SourceLocation Loc) { 6276 if (!CGF.HaveInsertPoint()) 6277 return; 6278 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6279 // global_tid); 6280 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6281 // Ignore return result until untied tasks are supported. 6282 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6283 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6284 Region->emitUntiedSwitch(CGF); 6285 } 6286 6287 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6288 OpenMPDirectiveKind InnerKind, 6289 const RegionCodeGenTy &CodeGen, 6290 bool HasCancel) { 6291 if (!CGF.HaveInsertPoint()) 6292 return; 6293 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6294 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6295 } 6296 6297 namespace { 6298 enum RTCancelKind { 6299 CancelNoreq = 0, 6300 CancelParallel = 1, 6301 CancelLoop = 2, 6302 CancelSections = 3, 6303 CancelTaskgroup = 4 6304 }; 6305 } // anonymous namespace 6306 6307 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6308 RTCancelKind CancelKind = CancelNoreq; 6309 if (CancelRegion == OMPD_parallel) 6310 CancelKind = CancelParallel; 6311 else if (CancelRegion == OMPD_for) 6312 CancelKind = CancelLoop; 6313 else if (CancelRegion == OMPD_sections) 6314 CancelKind = CancelSections; 6315 else { 6316 assert(CancelRegion == OMPD_taskgroup); 6317 CancelKind = CancelTaskgroup; 6318 } 6319 return CancelKind; 6320 } 6321 6322 void CGOpenMPRuntime::emitCancellationPointCall( 6323 CodeGenFunction &CGF, SourceLocation Loc, 6324 OpenMPDirectiveKind CancelRegion) { 6325 if (!CGF.HaveInsertPoint()) 6326 return; 6327 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6328 // global_tid, kmp_int32 cncl_kind); 6329 if (auto *OMPRegionInfo = 6330 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6331 // For 'cancellation point taskgroup', the task region info may not have a 6332 // cancel. This may instead happen in another adjacent task. 6333 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6334 llvm::Value *Args[] = { 6335 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6336 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6337 // Ignore return result until untied tasks are supported. 6338 llvm::Value *Result = CGF.EmitRuntimeCall( 6339 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6340 // if (__kmpc_cancellationpoint()) { 6341 // exit from construct; 6342 // } 6343 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6344 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6345 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6346 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6347 CGF.EmitBlock(ExitBB); 6348 // exit from construct; 6349 CodeGenFunction::JumpDest CancelDest = 6350 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6351 CGF.EmitBranchThroughCleanup(CancelDest); 6352 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6353 } 6354 } 6355 } 6356 6357 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6358 const Expr *IfCond, 6359 OpenMPDirectiveKind CancelRegion) { 6360 if (!CGF.HaveInsertPoint()) 6361 return; 6362 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6363 // kmp_int32 cncl_kind); 6364 if (auto *OMPRegionInfo = 6365 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6366 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6367 PrePostActionTy &) { 6368 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6369 llvm::Value *Args[] = { 6370 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6371 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6372 // Ignore return result until untied tasks are supported. 6373 llvm::Value *Result = CGF.EmitRuntimeCall( 6374 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6375 // if (__kmpc_cancel()) { 6376 // exit from construct; 6377 // } 6378 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6379 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6380 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6381 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6382 CGF.EmitBlock(ExitBB); 6383 // exit from construct; 6384 CodeGenFunction::JumpDest CancelDest = 6385 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6386 CGF.EmitBranchThroughCleanup(CancelDest); 6387 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6388 }; 6389 if (IfCond) { 6390 emitOMPIfClause(CGF, IfCond, ThenGen, 6391 [](CodeGenFunction &, PrePostActionTy &) {}); 6392 } else { 6393 RegionCodeGenTy ThenRCG(ThenGen); 6394 ThenRCG(CGF); 6395 } 6396 } 6397 } 6398 6399 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6400 const OMPExecutableDirective &D, StringRef ParentName, 6401 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6402 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6403 assert(!ParentName.empty() && "Invalid target region parent name!"); 6404 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6405 IsOffloadEntry, CodeGen); 6406 } 6407 6408 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6409 const OMPExecutableDirective &D, StringRef ParentName, 6410 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6411 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6412 // Create a unique name for the entry function using the source location 6413 // information of the current target region. The name will be something like: 6414 // 6415 // __omp_offloading_DD_FFFF_PP_lBB 6416 // 6417 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6418 // mangled name of the function that encloses the target region and BB is the 6419 // line number of the target region. 6420 6421 unsigned DeviceID; 6422 unsigned FileID; 6423 unsigned Line; 6424 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6425 Line); 6426 SmallString<64> EntryFnName; 6427 { 6428 llvm::raw_svector_ostream OS(EntryFnName); 6429 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6430 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6431 } 6432 6433 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6434 6435 CodeGenFunction CGF(CGM, true); 6436 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6437 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6438 6439 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6440 6441 // If this target outline function is not an offload entry, we don't need to 6442 // register it. 6443 if (!IsOffloadEntry) 6444 return; 6445 6446 // The target region ID is used by the runtime library to identify the current 6447 // target region, so it only has to be unique and not necessarily point to 6448 // anything. It could be the pointer to the outlined function that implements 6449 // the target region, but we aren't using that so that the compiler doesn't 6450 // need to keep that, and could therefore inline the host function if proven 6451 // worthwhile during optimization. In the other hand, if emitting code for the 6452 // device, the ID has to be the function address so that it can retrieved from 6453 // the offloading entry and launched by the runtime library. We also mark the 6454 // outlined function to have external linkage in case we are emitting code for 6455 // the device, because these functions will be entry points to the device. 6456 6457 if (CGM.getLangOpts().OpenMPIsDevice) { 6458 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6459 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6460 OutlinedFn->setDSOLocal(false); 6461 } else { 6462 std::string Name = getName({EntryFnName, "region_id"}); 6463 OutlinedFnID = new llvm::GlobalVariable( 6464 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6465 llvm::GlobalValue::WeakAnyLinkage, 6466 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6467 } 6468 6469 // Register the information for the entry associated with this target region. 6470 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6471 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6472 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6473 } 6474 6475 /// Checks if the expression is constant or does not have non-trivial function 6476 /// calls. 6477 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6478 // We can skip constant expressions. 6479 // We can skip expressions with trivial calls or simple expressions. 6480 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6481 !E->hasNonTrivialCall(Ctx)) && 6482 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6483 } 6484 6485 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6486 const Stmt *Body) { 6487 const Stmt *Child = Body->IgnoreContainers(); 6488 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6489 Child = nullptr; 6490 for (const Stmt *S : C->body()) { 6491 if (const auto *E = dyn_cast<Expr>(S)) { 6492 if (isTrivial(Ctx, E)) 6493 continue; 6494 } 6495 // Some of the statements can be ignored. 6496 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6497 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6498 continue; 6499 // Analyze declarations. 6500 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6501 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6502 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6503 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6504 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6505 isa<UsingDirectiveDecl>(D) || 6506 isa<OMPDeclareReductionDecl>(D) || 6507 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6508 return true; 6509 const auto *VD = dyn_cast<VarDecl>(D); 6510 if (!VD) 6511 return false; 6512 return VD->isConstexpr() || 6513 ((VD->getType().isTrivialType(Ctx) || 6514 VD->getType()->isReferenceType()) && 6515 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6516 })) 6517 continue; 6518 } 6519 // Found multiple children - cannot get the one child only. 6520 if (Child) 6521 return nullptr; 6522 Child = S; 6523 } 6524 if (Child) 6525 Child = Child->IgnoreContainers(); 6526 } 6527 return Child; 6528 } 6529 6530 /// Emit the number of teams for a target directive. Inspect the num_teams 6531 /// clause associated with a teams construct combined or closely nested 6532 /// with the target directive. 6533 /// 6534 /// Emit a team of size one for directives such as 'target parallel' that 6535 /// have no associated teams construct. 6536 /// 6537 /// Otherwise, return nullptr. 6538 static llvm::Value * 6539 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6540 const OMPExecutableDirective &D) { 6541 assert(!CGF.getLangOpts().OpenMPIsDevice && 6542 "Clauses associated with the teams directive expected to be emitted " 6543 "only for the host!"); 6544 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6545 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6546 "Expected target-based executable directive."); 6547 CGBuilderTy &Bld = CGF.Builder; 6548 switch (DirectiveKind) { 6549 case OMPD_target: { 6550 const auto *CS = D.getInnermostCapturedStmt(); 6551 const auto *Body = 6552 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6553 const Stmt *ChildStmt = 6554 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6555 if (const auto *NestedDir = 6556 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6557 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6558 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6559 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6560 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6561 const Expr *NumTeams = 6562 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6563 llvm::Value *NumTeamsVal = 6564 CGF.EmitScalarExpr(NumTeams, 6565 /*IgnoreResultAssign*/ true); 6566 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6567 /*IsSigned=*/true); 6568 } 6569 return Bld.getInt32(0); 6570 } 6571 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6572 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6573 return Bld.getInt32(1); 6574 return Bld.getInt32(0); 6575 } 6576 return nullptr; 6577 } 6578 case OMPD_target_teams: 6579 case OMPD_target_teams_distribute: 6580 case OMPD_target_teams_distribute_simd: 6581 case OMPD_target_teams_distribute_parallel_for: 6582 case OMPD_target_teams_distribute_parallel_for_simd: { 6583 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6584 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6585 const Expr *NumTeams = 6586 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6587 llvm::Value *NumTeamsVal = 6588 CGF.EmitScalarExpr(NumTeams, 6589 /*IgnoreResultAssign*/ true); 6590 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6591 /*IsSigned=*/true); 6592 } 6593 return Bld.getInt32(0); 6594 } 6595 case OMPD_target_parallel: 6596 case OMPD_target_parallel_for: 6597 case OMPD_target_parallel_for_simd: 6598 case OMPD_target_simd: 6599 return Bld.getInt32(1); 6600 case OMPD_parallel: 6601 case OMPD_for: 6602 case OMPD_parallel_for: 6603 case OMPD_parallel_sections: 6604 case OMPD_for_simd: 6605 case OMPD_parallel_for_simd: 6606 case OMPD_cancel: 6607 case OMPD_cancellation_point: 6608 case OMPD_ordered: 6609 case OMPD_threadprivate: 6610 case OMPD_allocate: 6611 case OMPD_task: 6612 case OMPD_simd: 6613 case OMPD_sections: 6614 case OMPD_section: 6615 case OMPD_single: 6616 case OMPD_master: 6617 case OMPD_critical: 6618 case OMPD_taskyield: 6619 case OMPD_barrier: 6620 case OMPD_taskwait: 6621 case OMPD_taskgroup: 6622 case OMPD_atomic: 6623 case OMPD_flush: 6624 case OMPD_teams: 6625 case OMPD_target_data: 6626 case OMPD_target_exit_data: 6627 case OMPD_target_enter_data: 6628 case OMPD_distribute: 6629 case OMPD_distribute_simd: 6630 case OMPD_distribute_parallel_for: 6631 case OMPD_distribute_parallel_for_simd: 6632 case OMPD_teams_distribute: 6633 case OMPD_teams_distribute_simd: 6634 case OMPD_teams_distribute_parallel_for: 6635 case OMPD_teams_distribute_parallel_for_simd: 6636 case OMPD_target_update: 6637 case OMPD_declare_simd: 6638 case OMPD_declare_target: 6639 case OMPD_end_declare_target: 6640 case OMPD_declare_reduction: 6641 case OMPD_declare_mapper: 6642 case OMPD_taskloop: 6643 case OMPD_taskloop_simd: 6644 case OMPD_requires: 6645 case OMPD_unknown: 6646 break; 6647 } 6648 llvm_unreachable("Unexpected directive kind."); 6649 } 6650 6651 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6652 llvm::Value *DefaultThreadLimitVal) { 6653 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6654 CGF.getContext(), CS->getCapturedStmt()); 6655 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6656 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6657 llvm::Value *NumThreads = nullptr; 6658 llvm::Value *CondVal = nullptr; 6659 // Handle if clause. If if clause present, the number of threads is 6660 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6661 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6662 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6663 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6664 const OMPIfClause *IfClause = nullptr; 6665 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6666 if (C->getNameModifier() == OMPD_unknown || 6667 C->getNameModifier() == OMPD_parallel) { 6668 IfClause = C; 6669 break; 6670 } 6671 } 6672 if (IfClause) { 6673 const Expr *Cond = IfClause->getCondition(); 6674 bool Result; 6675 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6676 if (!Result) 6677 return CGF.Builder.getInt32(1); 6678 } else { 6679 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6680 if (const auto *PreInit = 6681 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6682 for (const auto *I : PreInit->decls()) { 6683 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6684 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6685 } else { 6686 CodeGenFunction::AutoVarEmission Emission = 6687 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6688 CGF.EmitAutoVarCleanups(Emission); 6689 } 6690 } 6691 } 6692 CondVal = CGF.EvaluateExprAsBool(Cond); 6693 } 6694 } 6695 } 6696 // Check the value of num_threads clause iff if clause was not specified 6697 // or is not evaluated to false. 6698 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6699 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6700 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6701 const auto *NumThreadsClause = 6702 Dir->getSingleClause<OMPNumThreadsClause>(); 6703 CodeGenFunction::LexicalScope Scope( 6704 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6705 if (const auto *PreInit = 6706 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6707 for (const auto *I : PreInit->decls()) { 6708 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6709 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6710 } else { 6711 CodeGenFunction::AutoVarEmission Emission = 6712 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6713 CGF.EmitAutoVarCleanups(Emission); 6714 } 6715 } 6716 } 6717 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6718 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6719 /*IsSigned=*/false); 6720 if (DefaultThreadLimitVal) 6721 NumThreads = CGF.Builder.CreateSelect( 6722 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6723 DefaultThreadLimitVal, NumThreads); 6724 } else { 6725 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6726 : CGF.Builder.getInt32(0); 6727 } 6728 // Process condition of the if clause. 6729 if (CondVal) { 6730 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6731 CGF.Builder.getInt32(1)); 6732 } 6733 return NumThreads; 6734 } 6735 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6736 return CGF.Builder.getInt32(1); 6737 return DefaultThreadLimitVal; 6738 } 6739 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6740 : CGF.Builder.getInt32(0); 6741 } 6742 6743 /// Emit the number of threads for a target directive. Inspect the 6744 /// thread_limit clause associated with a teams construct combined or closely 6745 /// nested with the target directive. 6746 /// 6747 /// Emit the num_threads clause for directives such as 'target parallel' that 6748 /// have no associated teams construct. 6749 /// 6750 /// Otherwise, return nullptr. 6751 static llvm::Value * 6752 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6753 const OMPExecutableDirective &D) { 6754 assert(!CGF.getLangOpts().OpenMPIsDevice && 6755 "Clauses associated with the teams directive expected to be emitted " 6756 "only for the host!"); 6757 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6758 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6759 "Expected target-based executable directive."); 6760 CGBuilderTy &Bld = CGF.Builder; 6761 llvm::Value *ThreadLimitVal = nullptr; 6762 llvm::Value *NumThreadsVal = nullptr; 6763 switch (DirectiveKind) { 6764 case OMPD_target: { 6765 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6766 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6767 return NumThreads; 6768 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6769 CGF.getContext(), CS->getCapturedStmt()); 6770 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6771 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6772 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6773 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6774 const auto *ThreadLimitClause = 6775 Dir->getSingleClause<OMPThreadLimitClause>(); 6776 CodeGenFunction::LexicalScope Scope( 6777 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6778 if (const auto *PreInit = 6779 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6780 for (const auto *I : PreInit->decls()) { 6781 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6782 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6783 } else { 6784 CodeGenFunction::AutoVarEmission Emission = 6785 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6786 CGF.EmitAutoVarCleanups(Emission); 6787 } 6788 } 6789 } 6790 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6791 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6792 ThreadLimitVal = 6793 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6794 } 6795 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6796 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6797 CS = Dir->getInnermostCapturedStmt(); 6798 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6799 CGF.getContext(), CS->getCapturedStmt()); 6800 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6801 } 6802 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6803 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6804 CS = Dir->getInnermostCapturedStmt(); 6805 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6806 return NumThreads; 6807 } 6808 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6809 return Bld.getInt32(1); 6810 } 6811 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6812 } 6813 case OMPD_target_teams: { 6814 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6815 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6816 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6817 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6818 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6819 ThreadLimitVal = 6820 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6821 } 6822 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6823 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6824 return NumThreads; 6825 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6826 CGF.getContext(), CS->getCapturedStmt()); 6827 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6828 if (Dir->getDirectiveKind() == OMPD_distribute) { 6829 CS = Dir->getInnermostCapturedStmt(); 6830 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6831 return NumThreads; 6832 } 6833 } 6834 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6835 } 6836 case OMPD_target_teams_distribute: 6837 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6838 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6839 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6840 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6841 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6842 ThreadLimitVal = 6843 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6844 } 6845 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6846 case OMPD_target_parallel: 6847 case OMPD_target_parallel_for: 6848 case OMPD_target_parallel_for_simd: 6849 case OMPD_target_teams_distribute_parallel_for: 6850 case OMPD_target_teams_distribute_parallel_for_simd: { 6851 llvm::Value *CondVal = nullptr; 6852 // Handle if clause. If if clause present, the number of threads is 6853 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6854 if (D.hasClausesOfKind<OMPIfClause>()) { 6855 const OMPIfClause *IfClause = nullptr; 6856 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6857 if (C->getNameModifier() == OMPD_unknown || 6858 C->getNameModifier() == OMPD_parallel) { 6859 IfClause = C; 6860 break; 6861 } 6862 } 6863 if (IfClause) { 6864 const Expr *Cond = IfClause->getCondition(); 6865 bool Result; 6866 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6867 if (!Result) 6868 return Bld.getInt32(1); 6869 } else { 6870 CodeGenFunction::RunCleanupsScope Scope(CGF); 6871 CondVal = CGF.EvaluateExprAsBool(Cond); 6872 } 6873 } 6874 } 6875 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6876 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6877 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6878 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6879 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6880 ThreadLimitVal = 6881 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6882 } 6883 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6884 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6885 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6886 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6887 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6888 NumThreadsVal = 6889 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false); 6890 ThreadLimitVal = ThreadLimitVal 6891 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6892 ThreadLimitVal), 6893 NumThreadsVal, ThreadLimitVal) 6894 : NumThreadsVal; 6895 } 6896 if (!ThreadLimitVal) 6897 ThreadLimitVal = Bld.getInt32(0); 6898 if (CondVal) 6899 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6900 return ThreadLimitVal; 6901 } 6902 case OMPD_target_teams_distribute_simd: 6903 case OMPD_target_simd: 6904 return Bld.getInt32(1); 6905 case OMPD_parallel: 6906 case OMPD_for: 6907 case OMPD_parallel_for: 6908 case OMPD_parallel_sections: 6909 case OMPD_for_simd: 6910 case OMPD_parallel_for_simd: 6911 case OMPD_cancel: 6912 case OMPD_cancellation_point: 6913 case OMPD_ordered: 6914 case OMPD_threadprivate: 6915 case OMPD_allocate: 6916 case OMPD_task: 6917 case OMPD_simd: 6918 case OMPD_sections: 6919 case OMPD_section: 6920 case OMPD_single: 6921 case OMPD_master: 6922 case OMPD_critical: 6923 case OMPD_taskyield: 6924 case OMPD_barrier: 6925 case OMPD_taskwait: 6926 case OMPD_taskgroup: 6927 case OMPD_atomic: 6928 case OMPD_flush: 6929 case OMPD_teams: 6930 case OMPD_target_data: 6931 case OMPD_target_exit_data: 6932 case OMPD_target_enter_data: 6933 case OMPD_distribute: 6934 case OMPD_distribute_simd: 6935 case OMPD_distribute_parallel_for: 6936 case OMPD_distribute_parallel_for_simd: 6937 case OMPD_teams_distribute: 6938 case OMPD_teams_distribute_simd: 6939 case OMPD_teams_distribute_parallel_for: 6940 case OMPD_teams_distribute_parallel_for_simd: 6941 case OMPD_target_update: 6942 case OMPD_declare_simd: 6943 case OMPD_declare_target: 6944 case OMPD_end_declare_target: 6945 case OMPD_declare_reduction: 6946 case OMPD_declare_mapper: 6947 case OMPD_taskloop: 6948 case OMPD_taskloop_simd: 6949 case OMPD_requires: 6950 case OMPD_unknown: 6951 break; 6952 } 6953 llvm_unreachable("Unsupported directive kind."); 6954 } 6955 6956 namespace { 6957 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6958 6959 // Utility to handle information from clauses associated with a given 6960 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6961 // It provides a convenient interface to obtain the information and generate 6962 // code for that information. 6963 class MappableExprsHandler { 6964 public: 6965 /// Values for bit flags used to specify the mapping type for 6966 /// offloading. 6967 enum OpenMPOffloadMappingFlags : uint64_t { 6968 /// No flags 6969 OMP_MAP_NONE = 0x0, 6970 /// Allocate memory on the device and move data from host to device. 6971 OMP_MAP_TO = 0x01, 6972 /// Allocate memory on the device and move data from device to host. 6973 OMP_MAP_FROM = 0x02, 6974 /// Always perform the requested mapping action on the element, even 6975 /// if it was already mapped before. 6976 OMP_MAP_ALWAYS = 0x04, 6977 /// Delete the element from the device environment, ignoring the 6978 /// current reference count associated with the element. 6979 OMP_MAP_DELETE = 0x08, 6980 /// The element being mapped is a pointer-pointee pair; both the 6981 /// pointer and the pointee should be mapped. 6982 OMP_MAP_PTR_AND_OBJ = 0x10, 6983 /// This flags signals that the base address of an entry should be 6984 /// passed to the target kernel as an argument. 6985 OMP_MAP_TARGET_PARAM = 0x20, 6986 /// Signal that the runtime library has to return the device pointer 6987 /// in the current position for the data being mapped. Used when we have the 6988 /// use_device_ptr clause. 6989 OMP_MAP_RETURN_PARAM = 0x40, 6990 /// This flag signals that the reference being passed is a pointer to 6991 /// private data. 6992 OMP_MAP_PRIVATE = 0x80, 6993 /// Pass the element to the device by value. 6994 OMP_MAP_LITERAL = 0x100, 6995 /// Implicit map 6996 OMP_MAP_IMPLICIT = 0x200, 6997 /// The 16 MSBs of the flags indicate whether the entry is member of some 6998 /// struct/class. 6999 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7000 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7001 }; 7002 7003 /// Class that associates information with a base pointer to be passed to the 7004 /// runtime library. 7005 class BasePointerInfo { 7006 /// The base pointer. 7007 llvm::Value *Ptr = nullptr; 7008 /// The base declaration that refers to this device pointer, or null if 7009 /// there is none. 7010 const ValueDecl *DevPtrDecl = nullptr; 7011 7012 public: 7013 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7014 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7015 llvm::Value *operator*() const { return Ptr; } 7016 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7017 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7018 }; 7019 7020 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7021 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7022 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7023 7024 /// Map between a struct and the its lowest & highest elements which have been 7025 /// mapped. 7026 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7027 /// HE(FieldIndex, Pointer)} 7028 struct StructRangeInfoTy { 7029 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7030 0, Address::invalid()}; 7031 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7032 0, Address::invalid()}; 7033 Address Base = Address::invalid(); 7034 }; 7035 7036 private: 7037 /// Kind that defines how a device pointer has to be returned. 7038 struct MapInfo { 7039 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7040 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7041 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7042 bool ReturnDevicePointer = false; 7043 bool IsImplicit = false; 7044 7045 MapInfo() = default; 7046 MapInfo( 7047 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7048 OpenMPMapClauseKind MapType, 7049 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7050 bool ReturnDevicePointer, bool IsImplicit) 7051 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7052 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7053 }; 7054 7055 /// If use_device_ptr is used on a pointer which is a struct member and there 7056 /// is no map information about it, then emission of that entry is deferred 7057 /// until the whole struct has been processed. 7058 struct DeferredDevicePtrEntryTy { 7059 const Expr *IE = nullptr; 7060 const ValueDecl *VD = nullptr; 7061 7062 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7063 : IE(IE), VD(VD) {} 7064 }; 7065 7066 /// Directive from where the map clauses were extracted. 7067 const OMPExecutableDirective &CurDir; 7068 7069 /// Function the directive is being generated for. 7070 CodeGenFunction &CGF; 7071 7072 /// Set of all first private variables in the current directive. 7073 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 7074 7075 /// Map between device pointer declarations and their expression components. 7076 /// The key value for declarations in 'this' is null. 7077 llvm::DenseMap< 7078 const ValueDecl *, 7079 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7080 DevPointersMap; 7081 7082 llvm::Value *getExprTypeSize(const Expr *E) const { 7083 QualType ExprTy = E->getType().getCanonicalType(); 7084 7085 // Reference types are ignored for mapping purposes. 7086 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7087 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7088 7089 // Given that an array section is considered a built-in type, we need to 7090 // do the calculation based on the length of the section instead of relying 7091 // on CGF.getTypeSize(E->getType()). 7092 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7093 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7094 OAE->getBase()->IgnoreParenImpCasts()) 7095 .getCanonicalType(); 7096 7097 // If there is no length associated with the expression, that means we 7098 // are using the whole length of the base. 7099 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 7100 return CGF.getTypeSize(BaseTy); 7101 7102 llvm::Value *ElemSize; 7103 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7104 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7105 } else { 7106 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7107 assert(ATy && "Expecting array type if not a pointer type."); 7108 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7109 } 7110 7111 // If we don't have a length at this point, that is because we have an 7112 // array section with a single element. 7113 if (!OAE->getLength()) 7114 return ElemSize; 7115 7116 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 7117 LengthVal = 7118 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 7119 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7120 } 7121 return CGF.getTypeSize(ExprTy); 7122 } 7123 7124 /// Return the corresponding bits for a given map clause modifier. Add 7125 /// a flag marking the map as a pointer if requested. Add a flag marking the 7126 /// map as the first one of a series of maps that relate to the same map 7127 /// expression. 7128 OpenMPOffloadMappingFlags getMapTypeBits( 7129 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7130 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7131 OpenMPOffloadMappingFlags Bits = 7132 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7133 switch (MapType) { 7134 case OMPC_MAP_alloc: 7135 case OMPC_MAP_release: 7136 // alloc and release is the default behavior in the runtime library, i.e. 7137 // if we don't pass any bits alloc/release that is what the runtime is 7138 // going to do. Therefore, we don't need to signal anything for these two 7139 // type modifiers. 7140 break; 7141 case OMPC_MAP_to: 7142 Bits |= OMP_MAP_TO; 7143 break; 7144 case OMPC_MAP_from: 7145 Bits |= OMP_MAP_FROM; 7146 break; 7147 case OMPC_MAP_tofrom: 7148 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7149 break; 7150 case OMPC_MAP_delete: 7151 Bits |= OMP_MAP_DELETE; 7152 break; 7153 case OMPC_MAP_unknown: 7154 llvm_unreachable("Unexpected map type!"); 7155 } 7156 if (AddPtrFlag) 7157 Bits |= OMP_MAP_PTR_AND_OBJ; 7158 if (AddIsTargetParamFlag) 7159 Bits |= OMP_MAP_TARGET_PARAM; 7160 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7161 != MapModifiers.end()) 7162 Bits |= OMP_MAP_ALWAYS; 7163 return Bits; 7164 } 7165 7166 /// Return true if the provided expression is a final array section. A 7167 /// final array section, is one whose length can't be proved to be one. 7168 bool isFinalArraySectionExpression(const Expr *E) const { 7169 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7170 7171 // It is not an array section and therefore not a unity-size one. 7172 if (!OASE) 7173 return false; 7174 7175 // An array section with no colon always refer to a single element. 7176 if (OASE->getColonLoc().isInvalid()) 7177 return false; 7178 7179 const Expr *Length = OASE->getLength(); 7180 7181 // If we don't have a length we have to check if the array has size 1 7182 // for this dimension. Also, we should always expect a length if the 7183 // base type is pointer. 7184 if (!Length) { 7185 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7186 OASE->getBase()->IgnoreParenImpCasts()) 7187 .getCanonicalType(); 7188 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7189 return ATy->getSize().getSExtValue() != 1; 7190 // If we don't have a constant dimension length, we have to consider 7191 // the current section as having any size, so it is not necessarily 7192 // unitary. If it happen to be unity size, that's user fault. 7193 return true; 7194 } 7195 7196 // Check if the length evaluates to 1. 7197 Expr::EvalResult Result; 7198 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7199 return true; // Can have more that size 1. 7200 7201 llvm::APSInt ConstLength = Result.Val.getInt(); 7202 return ConstLength.getSExtValue() != 1; 7203 } 7204 7205 /// Generate the base pointers, section pointers, sizes and map type 7206 /// bits for the provided map type, map modifier, and expression components. 7207 /// \a IsFirstComponent should be set to true if the provided set of 7208 /// components is the first associated with a capture. 7209 void generateInfoForComponentList( 7210 OpenMPMapClauseKind MapType, 7211 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7212 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7213 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7214 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7215 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7216 bool IsImplicit, 7217 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7218 OverlappedElements = llvm::None) const { 7219 // The following summarizes what has to be generated for each map and the 7220 // types below. The generated information is expressed in this order: 7221 // base pointer, section pointer, size, flags 7222 // (to add to the ones that come from the map type and modifier). 7223 // 7224 // double d; 7225 // int i[100]; 7226 // float *p; 7227 // 7228 // struct S1 { 7229 // int i; 7230 // float f[50]; 7231 // } 7232 // struct S2 { 7233 // int i; 7234 // float f[50]; 7235 // S1 s; 7236 // double *p; 7237 // struct S2 *ps; 7238 // } 7239 // S2 s; 7240 // S2 *ps; 7241 // 7242 // map(d) 7243 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7244 // 7245 // map(i) 7246 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7247 // 7248 // map(i[1:23]) 7249 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7250 // 7251 // map(p) 7252 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7253 // 7254 // map(p[1:24]) 7255 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7256 // 7257 // map(s) 7258 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7259 // 7260 // map(s.i) 7261 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7262 // 7263 // map(s.s.f) 7264 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7265 // 7266 // map(s.p) 7267 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7268 // 7269 // map(to: s.p[:22]) 7270 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7271 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7272 // &(s.p), &(s.p[0]), 22*sizeof(double), 7273 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7274 // (*) alloc space for struct members, only this is a target parameter 7275 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7276 // optimizes this entry out, same in the examples below) 7277 // (***) map the pointee (map: to) 7278 // 7279 // map(s.ps) 7280 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7281 // 7282 // map(from: s.ps->s.i) 7283 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7284 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7285 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7286 // 7287 // map(to: s.ps->ps) 7288 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7289 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7290 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7291 // 7292 // map(s.ps->ps->ps) 7293 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7294 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7295 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7296 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7297 // 7298 // map(to: s.ps->ps->s.f[:22]) 7299 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7300 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7301 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7302 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7303 // 7304 // map(ps) 7305 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7306 // 7307 // map(ps->i) 7308 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7309 // 7310 // map(ps->s.f) 7311 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7312 // 7313 // map(from: ps->p) 7314 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7315 // 7316 // map(to: ps->p[:22]) 7317 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7318 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7319 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7320 // 7321 // map(ps->ps) 7322 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7323 // 7324 // map(from: ps->ps->s.i) 7325 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7326 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7327 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7328 // 7329 // map(from: ps->ps->ps) 7330 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7331 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7332 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7333 // 7334 // map(ps->ps->ps->ps) 7335 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7336 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7337 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7338 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7339 // 7340 // map(to: ps->ps->ps->s.f[:22]) 7341 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7342 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7343 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7344 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7345 // 7346 // map(to: s.f[:22]) map(from: s.p[:33]) 7347 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7348 // sizeof(double*) (**), TARGET_PARAM 7349 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7350 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7351 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7352 // (*) allocate contiguous space needed to fit all mapped members even if 7353 // we allocate space for members not mapped (in this example, 7354 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7355 // them as well because they fall between &s.f[0] and &s.p) 7356 // 7357 // map(from: s.f[:22]) map(to: ps->p[:33]) 7358 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7359 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7360 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7361 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7362 // (*) the struct this entry pertains to is the 2nd element in the list of 7363 // arguments, hence MEMBER_OF(2) 7364 // 7365 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7366 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7367 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7368 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7369 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7370 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7371 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7372 // (*) the struct this entry pertains to is the 4th element in the list 7373 // of arguments, hence MEMBER_OF(4) 7374 7375 // Track if the map information being generated is the first for a capture. 7376 bool IsCaptureFirstInfo = IsFirstComponentList; 7377 bool IsLink = false; // Is this variable a "declare target link"? 7378 7379 // Scan the components from the base to the complete expression. 7380 auto CI = Components.rbegin(); 7381 auto CE = Components.rend(); 7382 auto I = CI; 7383 7384 // Track if the map information being generated is the first for a list of 7385 // components. 7386 bool IsExpressionFirstInfo = true; 7387 Address BP = Address::invalid(); 7388 const Expr *AssocExpr = I->getAssociatedExpression(); 7389 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7390 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7391 7392 if (isa<MemberExpr>(AssocExpr)) { 7393 // The base is the 'this' pointer. The content of the pointer is going 7394 // to be the base of the field being mapped. 7395 BP = CGF.LoadCXXThisAddress(); 7396 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7397 (OASE && 7398 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7399 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7400 } else { 7401 // The base is the reference to the variable. 7402 // BP = &Var. 7403 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7404 if (const auto *VD = 7405 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7406 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7407 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) 7408 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { 7409 IsLink = true; 7410 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 7411 } 7412 } 7413 7414 // If the variable is a pointer and is being dereferenced (i.e. is not 7415 // the last component), the base has to be the pointer itself, not its 7416 // reference. References are ignored for mapping purposes. 7417 QualType Ty = 7418 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7419 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7420 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7421 7422 // We do not need to generate individual map information for the 7423 // pointer, it can be associated with the combined storage. 7424 ++I; 7425 } 7426 } 7427 7428 // Track whether a component of the list should be marked as MEMBER_OF some 7429 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7430 // in a component list should be marked as MEMBER_OF, all subsequent entries 7431 // do not belong to the base struct. E.g. 7432 // struct S2 s; 7433 // s.ps->ps->ps->f[:] 7434 // (1) (2) (3) (4) 7435 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7436 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7437 // is the pointee of ps(2) which is not member of struct s, so it should not 7438 // be marked as such (it is still PTR_AND_OBJ). 7439 // The variable is initialized to false so that PTR_AND_OBJ entries which 7440 // are not struct members are not considered (e.g. array of pointers to 7441 // data). 7442 bool ShouldBeMemberOf = false; 7443 7444 // Variable keeping track of whether or not we have encountered a component 7445 // in the component list which is a member expression. Useful when we have a 7446 // pointer or a final array section, in which case it is the previous 7447 // component in the list which tells us whether we have a member expression. 7448 // E.g. X.f[:] 7449 // While processing the final array section "[:]" it is "f" which tells us 7450 // whether we are dealing with a member of a declared struct. 7451 const MemberExpr *EncounteredME = nullptr; 7452 7453 for (; I != CE; ++I) { 7454 // If the current component is member of a struct (parent struct) mark it. 7455 if (!EncounteredME) { 7456 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7457 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7458 // as MEMBER_OF the parent struct. 7459 if (EncounteredME) 7460 ShouldBeMemberOf = true; 7461 } 7462 7463 auto Next = std::next(I); 7464 7465 // We need to generate the addresses and sizes if this is the last 7466 // component, if the component is a pointer or if it is an array section 7467 // whose length can't be proved to be one. If this is a pointer, it 7468 // becomes the base address for the following components. 7469 7470 // A final array section, is one whose length can't be proved to be one. 7471 bool IsFinalArraySection = 7472 isFinalArraySectionExpression(I->getAssociatedExpression()); 7473 7474 // Get information on whether the element is a pointer. Have to do a 7475 // special treatment for array sections given that they are built-in 7476 // types. 7477 const auto *OASE = 7478 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7479 bool IsPointer = 7480 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7481 .getCanonicalType() 7482 ->isAnyPointerType()) || 7483 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7484 7485 if (Next == CE || IsPointer || IsFinalArraySection) { 7486 // If this is not the last component, we expect the pointer to be 7487 // associated with an array expression or member expression. 7488 assert((Next == CE || 7489 isa<MemberExpr>(Next->getAssociatedExpression()) || 7490 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7491 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7492 "Unexpected expression"); 7493 7494 Address LB = 7495 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7496 7497 // If this component is a pointer inside the base struct then we don't 7498 // need to create any entry for it - it will be combined with the object 7499 // it is pointing to into a single PTR_AND_OBJ entry. 7500 bool IsMemberPointer = 7501 IsPointer && EncounteredME && 7502 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7503 EncounteredME); 7504 if (!OverlappedElements.empty()) { 7505 // Handle base element with the info for overlapped elements. 7506 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7507 assert(Next == CE && 7508 "Expected last element for the overlapped elements."); 7509 assert(!IsPointer && 7510 "Unexpected base element with the pointer type."); 7511 // Mark the whole struct as the struct that requires allocation on the 7512 // device. 7513 PartialStruct.LowestElem = {0, LB}; 7514 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7515 I->getAssociatedExpression()->getType()); 7516 Address HB = CGF.Builder.CreateConstGEP( 7517 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7518 CGF.VoidPtrTy), 7519 TypeSize.getQuantity() - 1); 7520 PartialStruct.HighestElem = { 7521 std::numeric_limits<decltype( 7522 PartialStruct.HighestElem.first)>::max(), 7523 HB}; 7524 PartialStruct.Base = BP; 7525 // Emit data for non-overlapped data. 7526 OpenMPOffloadMappingFlags Flags = 7527 OMP_MAP_MEMBER_OF | 7528 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7529 /*AddPtrFlag=*/false, 7530 /*AddIsTargetParamFlag=*/false); 7531 LB = BP; 7532 llvm::Value *Size = nullptr; 7533 // Do bitcopy of all non-overlapped structure elements. 7534 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7535 Component : OverlappedElements) { 7536 Address ComponentLB = Address::invalid(); 7537 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7538 Component) { 7539 if (MC.getAssociatedDeclaration()) { 7540 ComponentLB = 7541 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7542 .getAddress(); 7543 Size = CGF.Builder.CreatePtrDiff( 7544 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7545 CGF.EmitCastToVoidPtr(LB.getPointer())); 7546 break; 7547 } 7548 } 7549 BasePointers.push_back(BP.getPointer()); 7550 Pointers.push_back(LB.getPointer()); 7551 Sizes.push_back(Size); 7552 Types.push_back(Flags); 7553 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7554 } 7555 BasePointers.push_back(BP.getPointer()); 7556 Pointers.push_back(LB.getPointer()); 7557 Size = CGF.Builder.CreatePtrDiff( 7558 CGF.EmitCastToVoidPtr( 7559 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7560 CGF.EmitCastToVoidPtr(LB.getPointer())); 7561 Sizes.push_back(Size); 7562 Types.push_back(Flags); 7563 break; 7564 } 7565 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7566 if (!IsMemberPointer) { 7567 BasePointers.push_back(BP.getPointer()); 7568 Pointers.push_back(LB.getPointer()); 7569 Sizes.push_back(Size); 7570 7571 // We need to add a pointer flag for each map that comes from the 7572 // same expression except for the first one. We also need to signal 7573 // this map is the first one that relates with the current capture 7574 // (there is a set of entries for each capture). 7575 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7576 MapType, MapModifiers, IsImplicit, 7577 !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); 7578 7579 if (!IsExpressionFirstInfo) { 7580 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7581 // then we reset the TO/FROM/ALWAYS/DELETE flags. 7582 if (IsPointer) 7583 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7584 OMP_MAP_DELETE); 7585 7586 if (ShouldBeMemberOf) { 7587 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7588 // should be later updated with the correct value of MEMBER_OF. 7589 Flags |= OMP_MAP_MEMBER_OF; 7590 // From now on, all subsequent PTR_AND_OBJ entries should not be 7591 // marked as MEMBER_OF. 7592 ShouldBeMemberOf = false; 7593 } 7594 } 7595 7596 Types.push_back(Flags); 7597 } 7598 7599 // If we have encountered a member expression so far, keep track of the 7600 // mapped member. If the parent is "*this", then the value declaration 7601 // is nullptr. 7602 if (EncounteredME) { 7603 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7604 unsigned FieldIndex = FD->getFieldIndex(); 7605 7606 // Update info about the lowest and highest elements for this struct 7607 if (!PartialStruct.Base.isValid()) { 7608 PartialStruct.LowestElem = {FieldIndex, LB}; 7609 PartialStruct.HighestElem = {FieldIndex, LB}; 7610 PartialStruct.Base = BP; 7611 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7612 PartialStruct.LowestElem = {FieldIndex, LB}; 7613 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7614 PartialStruct.HighestElem = {FieldIndex, LB}; 7615 } 7616 } 7617 7618 // If we have a final array section, we are done with this expression. 7619 if (IsFinalArraySection) 7620 break; 7621 7622 // The pointer becomes the base for the next element. 7623 if (Next != CE) 7624 BP = LB; 7625 7626 IsExpressionFirstInfo = false; 7627 IsCaptureFirstInfo = false; 7628 } 7629 } 7630 } 7631 7632 /// Return the adjusted map modifiers if the declaration a capture refers to 7633 /// appears in a first-private clause. This is expected to be used only with 7634 /// directives that start with 'target'. 7635 MappableExprsHandler::OpenMPOffloadMappingFlags 7636 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7637 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7638 7639 // A first private variable captured by reference will use only the 7640 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7641 // declaration is known as first-private in this handler. 7642 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7643 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7644 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7645 return MappableExprsHandler::OMP_MAP_ALWAYS | 7646 MappableExprsHandler::OMP_MAP_TO; 7647 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7648 return MappableExprsHandler::OMP_MAP_TO | 7649 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7650 return MappableExprsHandler::OMP_MAP_PRIVATE | 7651 MappableExprsHandler::OMP_MAP_TO; 7652 } 7653 return MappableExprsHandler::OMP_MAP_TO | 7654 MappableExprsHandler::OMP_MAP_FROM; 7655 } 7656 7657 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7658 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. 7659 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7660 << 48); 7661 } 7662 7663 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7664 OpenMPOffloadMappingFlags MemberOfFlag) { 7665 // If the entry is PTR_AND_OBJ but has not been marked with the special 7666 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7667 // marked as MEMBER_OF. 7668 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7669 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7670 return; 7671 7672 // Reset the placeholder value to prepare the flag for the assignment of the 7673 // proper MEMBER_OF value. 7674 Flags &= ~OMP_MAP_MEMBER_OF; 7675 Flags |= MemberOfFlag; 7676 } 7677 7678 void getPlainLayout(const CXXRecordDecl *RD, 7679 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7680 bool AsBase) const { 7681 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7682 7683 llvm::StructType *St = 7684 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7685 7686 unsigned NumElements = St->getNumElements(); 7687 llvm::SmallVector< 7688 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7689 RecordLayout(NumElements); 7690 7691 // Fill bases. 7692 for (const auto &I : RD->bases()) { 7693 if (I.isVirtual()) 7694 continue; 7695 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7696 // Ignore empty bases. 7697 if (Base->isEmpty() || CGF.getContext() 7698 .getASTRecordLayout(Base) 7699 .getNonVirtualSize() 7700 .isZero()) 7701 continue; 7702 7703 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7704 RecordLayout[FieldIndex] = Base; 7705 } 7706 // Fill in virtual bases. 7707 for (const auto &I : RD->vbases()) { 7708 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7709 // Ignore empty bases. 7710 if (Base->isEmpty()) 7711 continue; 7712 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7713 if (RecordLayout[FieldIndex]) 7714 continue; 7715 RecordLayout[FieldIndex] = Base; 7716 } 7717 // Fill in all the fields. 7718 assert(!RD->isUnion() && "Unexpected union."); 7719 for (const auto *Field : RD->fields()) { 7720 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7721 // will fill in later.) 7722 if (!Field->isBitField()) { 7723 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7724 RecordLayout[FieldIndex] = Field; 7725 } 7726 } 7727 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7728 &Data : RecordLayout) { 7729 if (Data.isNull()) 7730 continue; 7731 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7732 getPlainLayout(Base, Layout, /*AsBase=*/true); 7733 else 7734 Layout.push_back(Data.get<const FieldDecl *>()); 7735 } 7736 } 7737 7738 public: 7739 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7740 : CurDir(Dir), CGF(CGF) { 7741 // Extract firstprivate clause information. 7742 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7743 for (const auto *D : C->varlists()) 7744 FirstPrivateDecls.insert( 7745 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 7746 // Extract device pointer clause information. 7747 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7748 for (auto L : C->component_lists()) 7749 DevPointersMap[L.first].push_back(L.second); 7750 } 7751 7752 /// Generate code for the combined entry if we have a partially mapped struct 7753 /// and take care of the mapping flags of the arguments corresponding to 7754 /// individual struct members. 7755 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7756 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7757 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7758 const StructRangeInfoTy &PartialStruct) const { 7759 // Base is the base of the struct 7760 BasePointers.push_back(PartialStruct.Base.getPointer()); 7761 // Pointer is the address of the lowest element 7762 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7763 Pointers.push_back(LB); 7764 // Size is (addr of {highest+1} element) - (addr of lowest element) 7765 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7766 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7767 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7768 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7769 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7770 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy, 7771 /*isSinged=*/false); 7772 Sizes.push_back(Size); 7773 // Map type is always TARGET_PARAM 7774 Types.push_back(OMP_MAP_TARGET_PARAM); 7775 // Remove TARGET_PARAM flag from the first element 7776 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7777 7778 // All other current entries will be MEMBER_OF the combined entry 7779 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7780 // 0xFFFF in the MEMBER_OF field). 7781 OpenMPOffloadMappingFlags MemberOfFlag = 7782 getMemberOfFlag(BasePointers.size() - 1); 7783 for (auto &M : CurTypes) 7784 setCorrectMemberOfFlag(M, MemberOfFlag); 7785 } 7786 7787 /// Generate all the base pointers, section pointers, sizes and map 7788 /// types for the extracted mappable expressions. Also, for each item that 7789 /// relates with a device pointer, a pair of the relevant declaration and 7790 /// index where it occurs is appended to the device pointers info array. 7791 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7792 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7793 MapFlagsArrayTy &Types) const { 7794 // We have to process the component lists that relate with the same 7795 // declaration in a single chunk so that we can generate the map flags 7796 // correctly. Therefore, we organize all lists in a map. 7797 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7798 7799 // Helper function to fill the information map for the different supported 7800 // clauses. 7801 auto &&InfoGen = [&Info]( 7802 const ValueDecl *D, 7803 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7804 OpenMPMapClauseKind MapType, 7805 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7806 bool ReturnDevicePointer, bool IsImplicit) { 7807 const ValueDecl *VD = 7808 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7809 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7810 IsImplicit); 7811 }; 7812 7813 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7814 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7815 for (const auto &L : C->component_lists()) { 7816 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7817 /*ReturnDevicePointer=*/false, C->isImplicit()); 7818 } 7819 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 7820 for (const auto &L : C->component_lists()) { 7821 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7822 /*ReturnDevicePointer=*/false, C->isImplicit()); 7823 } 7824 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 7825 for (const auto &L : C->component_lists()) { 7826 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7827 /*ReturnDevicePointer=*/false, C->isImplicit()); 7828 } 7829 7830 // Look at the use_device_ptr clause information and mark the existing map 7831 // entries as such. If there is no map information for an entry in the 7832 // use_device_ptr list, we create one with map type 'alloc' and zero size 7833 // section. It is the user fault if that was not mapped before. If there is 7834 // no map information and the pointer is a struct member, then we defer the 7835 // emission of that entry until the whole struct has been processed. 7836 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7837 DeferredInfo; 7838 7839 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7840 for (const auto *C : 7841 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { 7842 for (const auto &L : C->component_lists()) { 7843 assert(!L.second.empty() && "Not expecting empty list of components!"); 7844 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7845 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7846 const Expr *IE = L.second.back().getAssociatedExpression(); 7847 // If the first component is a member expression, we have to look into 7848 // 'this', which maps to null in the map of map information. Otherwise 7849 // look directly for the information. 7850 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7851 7852 // We potentially have map information for this declaration already. 7853 // Look for the first set of components that refer to it. 7854 if (It != Info.end()) { 7855 auto CI = std::find_if( 7856 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7857 return MI.Components.back().getAssociatedDeclaration() == VD; 7858 }); 7859 // If we found a map entry, signal that the pointer has to be returned 7860 // and move on to the next declaration. 7861 if (CI != It->second.end()) { 7862 CI->ReturnDevicePointer = true; 7863 continue; 7864 } 7865 } 7866 7867 // We didn't find any match in our map information - generate a zero 7868 // size array section - if the pointer is a struct member we defer this 7869 // action until the whole struct has been processed. 7870 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 7871 if (isa<MemberExpr>(IE)) { 7872 // Insert the pointer into Info to be processed by 7873 // generateInfoForComponentList. Because it is a member pointer 7874 // without a pointee, no entry will be generated for it, therefore 7875 // we need to generate one after the whole struct has been processed. 7876 // Nonetheless, generateInfoForComponentList must be called to take 7877 // the pointer into account for the calculation of the range of the 7878 // partial struct. 7879 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 7880 /*ReturnDevicePointer=*/false, C->isImplicit()); 7881 DeferredInfo[nullptr].emplace_back(IE, VD); 7882 } else { 7883 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7884 this->CGF.EmitLValue(IE), IE->getExprLoc()); 7885 BasePointers.emplace_back(Ptr, VD); 7886 Pointers.push_back(Ptr); 7887 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7888 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 7889 } 7890 } 7891 } 7892 7893 for (const auto &M : Info) { 7894 // We need to know when we generate information for the first component 7895 // associated with a capture, because the mapping flags depend on it. 7896 bool IsFirstComponentList = true; 7897 7898 // Temporary versions of arrays 7899 MapBaseValuesArrayTy CurBasePointers; 7900 MapValuesArrayTy CurPointers; 7901 MapValuesArrayTy CurSizes; 7902 MapFlagsArrayTy CurTypes; 7903 StructRangeInfoTy PartialStruct; 7904 7905 for (const MapInfo &L : M.second) { 7906 assert(!L.Components.empty() && 7907 "Not expecting declaration with no component lists."); 7908 7909 // Remember the current base pointer index. 7910 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 7911 // FIXME: MSVC 2013 seems to require this-> to find the member method. 7912 this->generateInfoForComponentList( 7913 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 7914 CurPointers, CurSizes, CurTypes, PartialStruct, 7915 IsFirstComponentList, L.IsImplicit); 7916 7917 // If this entry relates with a device pointer, set the relevant 7918 // declaration and add the 'return pointer' flag. 7919 if (L.ReturnDevicePointer) { 7920 assert(CurBasePointers.size() > CurrentBasePointersIdx && 7921 "Unexpected number of mapped base pointers."); 7922 7923 const ValueDecl *RelevantVD = 7924 L.Components.back().getAssociatedDeclaration(); 7925 assert(RelevantVD && 7926 "No relevant declaration related with device pointer??"); 7927 7928 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 7929 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 7930 } 7931 IsFirstComponentList = false; 7932 } 7933 7934 // Append any pending zero-length pointers which are struct members and 7935 // used with use_device_ptr. 7936 auto CI = DeferredInfo.find(M.first); 7937 if (CI != DeferredInfo.end()) { 7938 for (const DeferredDevicePtrEntryTy &L : CI->second) { 7939 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 7940 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7941 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 7942 CurBasePointers.emplace_back(BasePtr, L.VD); 7943 CurPointers.push_back(Ptr); 7944 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7945 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 7946 // value MEMBER_OF=FFFF so that the entry is later updated with the 7947 // correct value of MEMBER_OF. 7948 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 7949 OMP_MAP_MEMBER_OF); 7950 } 7951 } 7952 7953 // If there is an entry in PartialStruct it means we have a struct with 7954 // individual members mapped. Emit an extra combined entry. 7955 if (PartialStruct.Base.isValid()) 7956 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 7957 PartialStruct); 7958 7959 // We need to append the results of this capture to what we already have. 7960 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7961 Pointers.append(CurPointers.begin(), CurPointers.end()); 7962 Sizes.append(CurSizes.begin(), CurSizes.end()); 7963 Types.append(CurTypes.begin(), CurTypes.end()); 7964 } 7965 } 7966 7967 /// Emit capture info for lambdas for variables captured by reference. 7968 void generateInfoForLambdaCaptures( 7969 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 7970 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7971 MapFlagsArrayTy &Types, 7972 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 7973 const auto *RD = VD->getType() 7974 .getCanonicalType() 7975 .getNonReferenceType() 7976 ->getAsCXXRecordDecl(); 7977 if (!RD || !RD->isLambda()) 7978 return; 7979 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 7980 LValue VDLVal = CGF.MakeAddrLValue( 7981 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 7982 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 7983 FieldDecl *ThisCapture = nullptr; 7984 RD->getCaptureFields(Captures, ThisCapture); 7985 if (ThisCapture) { 7986 LValue ThisLVal = 7987 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 7988 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 7989 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 7990 BasePointers.push_back(ThisLVal.getPointer()); 7991 Pointers.push_back(ThisLValVal.getPointer()); 7992 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 7993 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 7994 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 7995 } 7996 for (const LambdaCapture &LC : RD->captures()) { 7997 if (LC.getCaptureKind() != LCK_ByRef) 7998 continue; 7999 const VarDecl *VD = LC.getCapturedVar(); 8000 auto It = Captures.find(VD); 8001 assert(It != Captures.end() && "Found lambda capture without field."); 8002 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8003 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8004 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8005 BasePointers.push_back(VarLVal.getPointer()); 8006 Pointers.push_back(VarLValVal.getPointer()); 8007 Sizes.push_back(CGF.getTypeSize( 8008 VD->getType().getCanonicalType().getNonReferenceType())); 8009 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8010 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8011 } 8012 } 8013 8014 /// Set correct indices for lambdas captures. 8015 void adjustMemberOfForLambdaCaptures( 8016 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8017 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8018 MapFlagsArrayTy &Types) const { 8019 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8020 // Set correct member_of idx for all implicit lambda captures. 8021 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8022 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8023 continue; 8024 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8025 assert(BasePtr && "Unable to find base lambda address."); 8026 int TgtIdx = -1; 8027 for (unsigned J = I; J > 0; --J) { 8028 unsigned Idx = J - 1; 8029 if (Pointers[Idx] != BasePtr) 8030 continue; 8031 TgtIdx = Idx; 8032 break; 8033 } 8034 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8035 // All other current entries will be MEMBER_OF the combined entry 8036 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8037 // 0xFFFF in the MEMBER_OF field). 8038 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8039 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8040 } 8041 } 8042 8043 /// Generate the base pointers, section pointers, sizes and map types 8044 /// associated to a given capture. 8045 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8046 llvm::Value *Arg, 8047 MapBaseValuesArrayTy &BasePointers, 8048 MapValuesArrayTy &Pointers, 8049 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8050 StructRangeInfoTy &PartialStruct) const { 8051 assert(!Cap->capturesVariableArrayType() && 8052 "Not expecting to generate map info for a variable array type!"); 8053 8054 // We need to know when we generating information for the first component 8055 const ValueDecl *VD = Cap->capturesThis() 8056 ? nullptr 8057 : Cap->getCapturedVar()->getCanonicalDecl(); 8058 8059 // If this declaration appears in a is_device_ptr clause we just have to 8060 // pass the pointer by value. If it is a reference to a declaration, we just 8061 // pass its value. 8062 if (DevPointersMap.count(VD)) { 8063 BasePointers.emplace_back(Arg, VD); 8064 Pointers.push_back(Arg); 8065 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 8066 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8067 return; 8068 } 8069 8070 using MapData = 8071 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8072 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8073 SmallVector<MapData, 4> DeclComponentLists; 8074 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 8075 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8076 for (const auto &L : C->decl_component_lists(VD)) { 8077 assert(L.first == VD && 8078 "We got information for the wrong declaration??"); 8079 assert(!L.second.empty() && 8080 "Not expecting declaration with no component lists."); 8081 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8082 C->getMapTypeModifiers(), 8083 C->isImplicit()); 8084 } 8085 } 8086 8087 // Find overlapping elements (including the offset from the base element). 8088 llvm::SmallDenseMap< 8089 const MapData *, 8090 llvm::SmallVector< 8091 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8092 4> 8093 OverlappedData; 8094 size_t Count = 0; 8095 for (const MapData &L : DeclComponentLists) { 8096 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8097 OpenMPMapClauseKind MapType; 8098 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8099 bool IsImplicit; 8100 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8101 ++Count; 8102 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8103 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8104 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8105 auto CI = Components.rbegin(); 8106 auto CE = Components.rend(); 8107 auto SI = Components1.rbegin(); 8108 auto SE = Components1.rend(); 8109 for (; CI != CE && SI != SE; ++CI, ++SI) { 8110 if (CI->getAssociatedExpression()->getStmtClass() != 8111 SI->getAssociatedExpression()->getStmtClass()) 8112 break; 8113 // Are we dealing with different variables/fields? 8114 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8115 break; 8116 } 8117 // Found overlapping if, at least for one component, reached the head of 8118 // the components list. 8119 if (CI == CE || SI == SE) { 8120 assert((CI != CE || SI != SE) && 8121 "Unexpected full match of the mapping components."); 8122 const MapData &BaseData = CI == CE ? L : L1; 8123 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8124 SI == SE ? Components : Components1; 8125 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8126 OverlappedElements.getSecond().push_back(SubData); 8127 } 8128 } 8129 } 8130 // Sort the overlapped elements for each item. 8131 llvm::SmallVector<const FieldDecl *, 4> Layout; 8132 if (!OverlappedData.empty()) { 8133 if (const auto *CRD = 8134 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8135 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8136 else { 8137 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8138 Layout.append(RD->field_begin(), RD->field_end()); 8139 } 8140 } 8141 for (auto &Pair : OverlappedData) { 8142 llvm::sort( 8143 Pair.getSecond(), 8144 [&Layout]( 8145 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8146 OMPClauseMappableExprCommon::MappableExprComponentListRef 8147 Second) { 8148 auto CI = First.rbegin(); 8149 auto CE = First.rend(); 8150 auto SI = Second.rbegin(); 8151 auto SE = Second.rend(); 8152 for (; CI != CE && SI != SE; ++CI, ++SI) { 8153 if (CI->getAssociatedExpression()->getStmtClass() != 8154 SI->getAssociatedExpression()->getStmtClass()) 8155 break; 8156 // Are we dealing with different variables/fields? 8157 if (CI->getAssociatedDeclaration() != 8158 SI->getAssociatedDeclaration()) 8159 break; 8160 } 8161 8162 // Lists contain the same elements. 8163 if (CI == CE && SI == SE) 8164 return false; 8165 8166 // List with less elements is less than list with more elements. 8167 if (CI == CE || SI == SE) 8168 return CI == CE; 8169 8170 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8171 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8172 if (FD1->getParent() == FD2->getParent()) 8173 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8174 const auto It = 8175 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8176 return FD == FD1 || FD == FD2; 8177 }); 8178 return *It == FD1; 8179 }); 8180 } 8181 8182 // Associated with a capture, because the mapping flags depend on it. 8183 // Go through all of the elements with the overlapped elements. 8184 for (const auto &Pair : OverlappedData) { 8185 const MapData &L = *Pair.getFirst(); 8186 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8187 OpenMPMapClauseKind MapType; 8188 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8189 bool IsImplicit; 8190 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8191 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8192 OverlappedComponents = Pair.getSecond(); 8193 bool IsFirstComponentList = true; 8194 generateInfoForComponentList(MapType, MapModifiers, Components, 8195 BasePointers, Pointers, Sizes, Types, 8196 PartialStruct, IsFirstComponentList, 8197 IsImplicit, OverlappedComponents); 8198 } 8199 // Go through other elements without overlapped elements. 8200 bool IsFirstComponentList = OverlappedData.empty(); 8201 for (const MapData &L : DeclComponentLists) { 8202 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8203 OpenMPMapClauseKind MapType; 8204 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8205 bool IsImplicit; 8206 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8207 auto It = OverlappedData.find(&L); 8208 if (It == OverlappedData.end()) 8209 generateInfoForComponentList(MapType, MapModifiers, Components, 8210 BasePointers, Pointers, Sizes, Types, 8211 PartialStruct, IsFirstComponentList, 8212 IsImplicit); 8213 IsFirstComponentList = false; 8214 } 8215 } 8216 8217 /// Generate the base pointers, section pointers, sizes and map types 8218 /// associated with the declare target link variables. 8219 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8220 MapValuesArrayTy &Pointers, 8221 MapValuesArrayTy &Sizes, 8222 MapFlagsArrayTy &Types) const { 8223 // Map other list items in the map clause which are not captured variables 8224 // but "declare target link" global variables., 8225 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8226 for (const auto &L : C->component_lists()) { 8227 if (!L.first) 8228 continue; 8229 const auto *VD = dyn_cast<VarDecl>(L.first); 8230 if (!VD) 8231 continue; 8232 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8233 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8234 if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8235 continue; 8236 StructRangeInfoTy PartialStruct; 8237 generateInfoForComponentList( 8238 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8239 Pointers, Sizes, Types, PartialStruct, 8240 /*IsFirstComponentList=*/true, C->isImplicit()); 8241 assert(!PartialStruct.Base.isValid() && 8242 "No partial structs for declare target link expected."); 8243 } 8244 } 8245 } 8246 8247 /// Generate the default map information for a given capture \a CI, 8248 /// record field declaration \a RI and captured value \a CV. 8249 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8250 const FieldDecl &RI, llvm::Value *CV, 8251 MapBaseValuesArrayTy &CurBasePointers, 8252 MapValuesArrayTy &CurPointers, 8253 MapValuesArrayTy &CurSizes, 8254 MapFlagsArrayTy &CurMapTypes) const { 8255 // Do the default mapping. 8256 if (CI.capturesThis()) { 8257 CurBasePointers.push_back(CV); 8258 CurPointers.push_back(CV); 8259 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8260 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 8261 // Default map type. 8262 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8263 } else if (CI.capturesVariableByCopy()) { 8264 CurBasePointers.push_back(CV); 8265 CurPointers.push_back(CV); 8266 if (!RI.getType()->isAnyPointerType()) { 8267 // We have to signal to the runtime captures passed by value that are 8268 // not pointers. 8269 CurMapTypes.push_back(OMP_MAP_LITERAL); 8270 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 8271 } else { 8272 // Pointers are implicitly mapped with a zero size and no flags 8273 // (other than first map that is added for all implicit maps). 8274 CurMapTypes.push_back(OMP_MAP_NONE); 8275 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 8276 } 8277 } else { 8278 assert(CI.capturesVariable() && "Expected captured reference."); 8279 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8280 QualType ElementType = PtrTy->getPointeeType(); 8281 CurSizes.push_back(CGF.getTypeSize(ElementType)); 8282 // The default map type for a scalar/complex type is 'to' because by 8283 // default the value doesn't have to be retrieved. For an aggregate 8284 // type, the default is 'tofrom'. 8285 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8286 const VarDecl *VD = CI.getCapturedVar(); 8287 if (FirstPrivateDecls.count(VD) && 8288 VD->getType().isConstant(CGF.getContext())) { 8289 llvm::Constant *Addr = 8290 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8291 // Copy the value of the original variable to the new global copy. 8292 CGF.Builder.CreateMemCpy( 8293 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8294 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8295 CurSizes.back(), /*isVolatile=*/false); 8296 // Use new global variable as the base pointers. 8297 CurBasePointers.push_back(Addr); 8298 CurPointers.push_back(Addr); 8299 } else { 8300 CurBasePointers.push_back(CV); 8301 if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) { 8302 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8303 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8304 AlignmentSource::Decl)); 8305 CurPointers.push_back(PtrAddr.getPointer()); 8306 } else { 8307 CurPointers.push_back(CV); 8308 } 8309 } 8310 } 8311 // Every default map produces a single argument which is a target parameter. 8312 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8313 8314 // Add flag stating this is an implicit map. 8315 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8316 } 8317 }; 8318 8319 enum OpenMPOffloadingReservedDeviceIDs { 8320 /// Device ID if the device was not defined, runtime should get it 8321 /// from environment variables in the spec. 8322 OMP_DEVICEID_UNDEF = -1, 8323 }; 8324 } // anonymous namespace 8325 8326 /// Emit the arrays used to pass the captures and map information to the 8327 /// offloading runtime library. If there is no map or capture information, 8328 /// return nullptr by reference. 8329 static void 8330 emitOffloadingArrays(CodeGenFunction &CGF, 8331 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8332 MappableExprsHandler::MapValuesArrayTy &Pointers, 8333 MappableExprsHandler::MapValuesArrayTy &Sizes, 8334 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8335 CGOpenMPRuntime::TargetDataInfo &Info) { 8336 CodeGenModule &CGM = CGF.CGM; 8337 ASTContext &Ctx = CGF.getContext(); 8338 8339 // Reset the array information. 8340 Info.clearArrayInfo(); 8341 Info.NumberOfPtrs = BasePointers.size(); 8342 8343 if (Info.NumberOfPtrs) { 8344 // Detect if we have any capture size requiring runtime evaluation of the 8345 // size so that a constant array could be eventually used. 8346 bool hasRuntimeEvaluationCaptureSize = false; 8347 for (llvm::Value *S : Sizes) 8348 if (!isa<llvm::Constant>(S)) { 8349 hasRuntimeEvaluationCaptureSize = true; 8350 break; 8351 } 8352 8353 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8354 QualType PointerArrayType = 8355 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 8356 /*IndexTypeQuals=*/0); 8357 8358 Info.BasePointersArray = 8359 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8360 Info.PointersArray = 8361 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8362 8363 // If we don't have any VLA types or other types that require runtime 8364 // evaluation, we can use a constant array for the map sizes, otherwise we 8365 // need to fill up the arrays as we do for the pointers. 8366 if (hasRuntimeEvaluationCaptureSize) { 8367 QualType SizeArrayType = Ctx.getConstantArrayType( 8368 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 8369 /*IndexTypeQuals=*/0); 8370 Info.SizesArray = 8371 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8372 } else { 8373 // We expect all the sizes to be constant, so we collect them to create 8374 // a constant array. 8375 SmallVector<llvm::Constant *, 16> ConstSizes; 8376 for (llvm::Value *S : Sizes) 8377 ConstSizes.push_back(cast<llvm::Constant>(S)); 8378 8379 auto *SizesArrayInit = llvm::ConstantArray::get( 8380 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 8381 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8382 auto *SizesArrayGbl = new llvm::GlobalVariable( 8383 CGM.getModule(), SizesArrayInit->getType(), 8384 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8385 SizesArrayInit, Name); 8386 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8387 Info.SizesArray = SizesArrayGbl; 8388 } 8389 8390 // The map types are always constant so we don't need to generate code to 8391 // fill arrays. Instead, we create an array constant. 8392 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8393 llvm::copy(MapTypes, Mapping.begin()); 8394 llvm::Constant *MapTypesArrayInit = 8395 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8396 std::string MaptypesName = 8397 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8398 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8399 CGM.getModule(), MapTypesArrayInit->getType(), 8400 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8401 MapTypesArrayInit, MaptypesName); 8402 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8403 Info.MapTypesArray = MapTypesArrayGbl; 8404 8405 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8406 llvm::Value *BPVal = *BasePointers[I]; 8407 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8408 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8409 Info.BasePointersArray, 0, I); 8410 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8411 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8412 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8413 CGF.Builder.CreateStore(BPVal, BPAddr); 8414 8415 if (Info.requiresDevicePointerInfo()) 8416 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8417 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8418 8419 llvm::Value *PVal = Pointers[I]; 8420 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8421 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8422 Info.PointersArray, 0, I); 8423 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8424 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8425 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8426 CGF.Builder.CreateStore(PVal, PAddr); 8427 8428 if (hasRuntimeEvaluationCaptureSize) { 8429 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8430 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 8431 Info.SizesArray, 8432 /*Idx0=*/0, 8433 /*Idx1=*/I); 8434 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 8435 CGF.Builder.CreateStore( 8436 CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), 8437 SAddr); 8438 } 8439 } 8440 } 8441 } 8442 /// Emit the arguments to be passed to the runtime library based on the 8443 /// arrays of pointers, sizes and map types. 8444 static void emitOffloadingArraysArgument( 8445 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8446 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8447 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8448 CodeGenModule &CGM = CGF.CGM; 8449 if (Info.NumberOfPtrs) { 8450 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8451 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8452 Info.BasePointersArray, 8453 /*Idx0=*/0, /*Idx1=*/0); 8454 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8455 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8456 Info.PointersArray, 8457 /*Idx0=*/0, 8458 /*Idx1=*/0); 8459 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8460 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 8461 /*Idx0=*/0, /*Idx1=*/0); 8462 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8463 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8464 Info.MapTypesArray, 8465 /*Idx0=*/0, 8466 /*Idx1=*/0); 8467 } else { 8468 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8469 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8470 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 8471 MapTypesArrayArg = 8472 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8473 } 8474 } 8475 8476 /// Check for inner distribute directive. 8477 static const OMPExecutableDirective * 8478 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8479 const auto *CS = D.getInnermostCapturedStmt(); 8480 const auto *Body = 8481 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8482 const Stmt *ChildStmt = 8483 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8484 8485 if (const auto *NestedDir = 8486 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8487 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8488 switch (D.getDirectiveKind()) { 8489 case OMPD_target: 8490 if (isOpenMPDistributeDirective(DKind)) 8491 return NestedDir; 8492 if (DKind == OMPD_teams) { 8493 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8494 /*IgnoreCaptured=*/true); 8495 if (!Body) 8496 return nullptr; 8497 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8498 if (const auto *NND = 8499 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8500 DKind = NND->getDirectiveKind(); 8501 if (isOpenMPDistributeDirective(DKind)) 8502 return NND; 8503 } 8504 } 8505 return nullptr; 8506 case OMPD_target_teams: 8507 if (isOpenMPDistributeDirective(DKind)) 8508 return NestedDir; 8509 return nullptr; 8510 case OMPD_target_parallel: 8511 case OMPD_target_simd: 8512 case OMPD_target_parallel_for: 8513 case OMPD_target_parallel_for_simd: 8514 return nullptr; 8515 case OMPD_target_teams_distribute: 8516 case OMPD_target_teams_distribute_simd: 8517 case OMPD_target_teams_distribute_parallel_for: 8518 case OMPD_target_teams_distribute_parallel_for_simd: 8519 case OMPD_parallel: 8520 case OMPD_for: 8521 case OMPD_parallel_for: 8522 case OMPD_parallel_sections: 8523 case OMPD_for_simd: 8524 case OMPD_parallel_for_simd: 8525 case OMPD_cancel: 8526 case OMPD_cancellation_point: 8527 case OMPD_ordered: 8528 case OMPD_threadprivate: 8529 case OMPD_allocate: 8530 case OMPD_task: 8531 case OMPD_simd: 8532 case OMPD_sections: 8533 case OMPD_section: 8534 case OMPD_single: 8535 case OMPD_master: 8536 case OMPD_critical: 8537 case OMPD_taskyield: 8538 case OMPD_barrier: 8539 case OMPD_taskwait: 8540 case OMPD_taskgroup: 8541 case OMPD_atomic: 8542 case OMPD_flush: 8543 case OMPD_teams: 8544 case OMPD_target_data: 8545 case OMPD_target_exit_data: 8546 case OMPD_target_enter_data: 8547 case OMPD_distribute: 8548 case OMPD_distribute_simd: 8549 case OMPD_distribute_parallel_for: 8550 case OMPD_distribute_parallel_for_simd: 8551 case OMPD_teams_distribute: 8552 case OMPD_teams_distribute_simd: 8553 case OMPD_teams_distribute_parallel_for: 8554 case OMPD_teams_distribute_parallel_for_simd: 8555 case OMPD_target_update: 8556 case OMPD_declare_simd: 8557 case OMPD_declare_target: 8558 case OMPD_end_declare_target: 8559 case OMPD_declare_reduction: 8560 case OMPD_declare_mapper: 8561 case OMPD_taskloop: 8562 case OMPD_taskloop_simd: 8563 case OMPD_requires: 8564 case OMPD_unknown: 8565 llvm_unreachable("Unexpected directive."); 8566 } 8567 } 8568 8569 return nullptr; 8570 } 8571 8572 void CGOpenMPRuntime::emitTargetNumIterationsCall( 8573 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 8574 const llvm::function_ref<llvm::Value *( 8575 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 8576 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 8577 const OMPExecutableDirective *TD = &D; 8578 // Get nested teams distribute kind directive, if any. 8579 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 8580 TD = getNestedDistributeDirective(CGM.getContext(), D); 8581 if (!TD) 8582 return; 8583 const auto *LD = cast<OMPLoopDirective>(TD); 8584 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 8585 PrePostActionTy &) { 8586 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 8587 8588 // Emit device ID if any. 8589 llvm::Value *DeviceID; 8590 if (Device) 8591 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8592 CGF.Int64Ty, /*isSigned=*/true); 8593 else 8594 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8595 8596 llvm::Value *Args[] = {DeviceID, NumIterations}; 8597 CGF.EmitRuntimeCall( 8598 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 8599 }; 8600 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 8601 } 8602 8603 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 8604 const OMPExecutableDirective &D, 8605 llvm::Function *OutlinedFn, 8606 llvm::Value *OutlinedFnID, 8607 const Expr *IfCond, const Expr *Device) { 8608 if (!CGF.HaveInsertPoint()) 8609 return; 8610 8611 assert(OutlinedFn && "Invalid outlined function!"); 8612 8613 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 8614 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 8615 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 8616 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 8617 PrePostActionTy &) { 8618 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8619 }; 8620 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 8621 8622 CodeGenFunction::OMPTargetDataInfo InputInfo; 8623 llvm::Value *MapTypesArray = nullptr; 8624 // Fill up the pointer arrays and transfer execution to the device. 8625 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 8626 &MapTypesArray, &CS, RequiresOuterTask, 8627 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 8628 // On top of the arrays that were filled up, the target offloading call 8629 // takes as arguments the device id as well as the host pointer. The host 8630 // pointer is used by the runtime library to identify the current target 8631 // region, so it only has to be unique and not necessarily point to 8632 // anything. It could be the pointer to the outlined function that 8633 // implements the target region, but we aren't using that so that the 8634 // compiler doesn't need to keep that, and could therefore inline the host 8635 // function if proven worthwhile during optimization. 8636 8637 // From this point on, we need to have an ID of the target region defined. 8638 assert(OutlinedFnID && "Invalid outlined function ID!"); 8639 8640 // Emit device ID if any. 8641 llvm::Value *DeviceID; 8642 if (Device) { 8643 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8644 CGF.Int64Ty, /*isSigned=*/true); 8645 } else { 8646 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8647 } 8648 8649 // Emit the number of elements in the offloading arrays. 8650 llvm::Value *PointerNum = 8651 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8652 8653 // Return value of the runtime offloading call. 8654 llvm::Value *Return; 8655 8656 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 8657 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 8658 8659 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8660 // The target region is an outlined function launched by the runtime 8661 // via calls __tgt_target() or __tgt_target_teams(). 8662 // 8663 // __tgt_target() launches a target region with one team and one thread, 8664 // executing a serial region. This master thread may in turn launch 8665 // more threads within its team upon encountering a parallel region, 8666 // however, no additional teams can be launched on the device. 8667 // 8668 // __tgt_target_teams() launches a target region with one or more teams, 8669 // each with one or more threads. This call is required for target 8670 // constructs such as: 8671 // 'target teams' 8672 // 'target' / 'teams' 8673 // 'target teams distribute parallel for' 8674 // 'target parallel' 8675 // and so on. 8676 // 8677 // Note that on the host and CPU targets, the runtime implementation of 8678 // these calls simply call the outlined function without forking threads. 8679 // The outlined functions themselves have runtime calls to 8680 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 8681 // the compiler in emitTeamsCall() and emitParallelCall(). 8682 // 8683 // In contrast, on the NVPTX target, the implementation of 8684 // __tgt_target_teams() launches a GPU kernel with the requested number 8685 // of teams and threads so no additional calls to the runtime are required. 8686 if (NumTeams) { 8687 // If we have NumTeams defined this means that we have an enclosed teams 8688 // region. Therefore we also expect to have NumThreads defined. These two 8689 // values should be defined in the presence of a teams directive, 8690 // regardless of having any clauses associated. If the user is using teams 8691 // but no clauses, these two values will be the default that should be 8692 // passed to the runtime library - a 32-bit integer with the value zero. 8693 assert(NumThreads && "Thread limit expression should be available along " 8694 "with number of teams."); 8695 llvm::Value *OffloadingArgs[] = {DeviceID, 8696 OutlinedFnID, 8697 PointerNum, 8698 InputInfo.BasePointersArray.getPointer(), 8699 InputInfo.PointersArray.getPointer(), 8700 InputInfo.SizesArray.getPointer(), 8701 MapTypesArray, 8702 NumTeams, 8703 NumThreads}; 8704 Return = CGF.EmitRuntimeCall( 8705 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 8706 : OMPRTL__tgt_target_teams), 8707 OffloadingArgs); 8708 } else { 8709 llvm::Value *OffloadingArgs[] = {DeviceID, 8710 OutlinedFnID, 8711 PointerNum, 8712 InputInfo.BasePointersArray.getPointer(), 8713 InputInfo.PointersArray.getPointer(), 8714 InputInfo.SizesArray.getPointer(), 8715 MapTypesArray}; 8716 Return = CGF.EmitRuntimeCall( 8717 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 8718 : OMPRTL__tgt_target), 8719 OffloadingArgs); 8720 } 8721 8722 // Check the error code and execute the host version if required. 8723 llvm::BasicBlock *OffloadFailedBlock = 8724 CGF.createBasicBlock("omp_offload.failed"); 8725 llvm::BasicBlock *OffloadContBlock = 8726 CGF.createBasicBlock("omp_offload.cont"); 8727 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 8728 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 8729 8730 CGF.EmitBlock(OffloadFailedBlock); 8731 if (RequiresOuterTask) { 8732 CapturedVars.clear(); 8733 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8734 } 8735 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8736 CGF.EmitBranch(OffloadContBlock); 8737 8738 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 8739 }; 8740 8741 // Notify that the host version must be executed. 8742 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 8743 RequiresOuterTask](CodeGenFunction &CGF, 8744 PrePostActionTy &) { 8745 if (RequiresOuterTask) { 8746 CapturedVars.clear(); 8747 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8748 } 8749 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8750 }; 8751 8752 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 8753 &CapturedVars, RequiresOuterTask, 8754 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 8755 // Fill up the arrays with all the captured variables. 8756 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8757 MappableExprsHandler::MapValuesArrayTy Pointers; 8758 MappableExprsHandler::MapValuesArrayTy Sizes; 8759 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8760 8761 // Get mappable expression information. 8762 MappableExprsHandler MEHandler(D, CGF); 8763 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 8764 8765 auto RI = CS.getCapturedRecordDecl()->field_begin(); 8766 auto CV = CapturedVars.begin(); 8767 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 8768 CE = CS.capture_end(); 8769 CI != CE; ++CI, ++RI, ++CV) { 8770 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 8771 MappableExprsHandler::MapValuesArrayTy CurPointers; 8772 MappableExprsHandler::MapValuesArrayTy CurSizes; 8773 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 8774 MappableExprsHandler::StructRangeInfoTy PartialStruct; 8775 8776 // VLA sizes are passed to the outlined region by copy and do not have map 8777 // information associated. 8778 if (CI->capturesVariableArrayType()) { 8779 CurBasePointers.push_back(*CV); 8780 CurPointers.push_back(*CV); 8781 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 8782 // Copy to the device as an argument. No need to retrieve it. 8783 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 8784 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 8785 } else { 8786 // If we have any information in the map clause, we use it, otherwise we 8787 // just do a default mapping. 8788 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 8789 CurSizes, CurMapTypes, PartialStruct); 8790 if (CurBasePointers.empty()) 8791 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 8792 CurPointers, CurSizes, CurMapTypes); 8793 // Generate correct mapping for variables captured by reference in 8794 // lambdas. 8795 if (CI->capturesVariable()) 8796 MEHandler.generateInfoForLambdaCaptures( 8797 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 8798 CurMapTypes, LambdaPointers); 8799 } 8800 // We expect to have at least an element of information for this capture. 8801 assert(!CurBasePointers.empty() && 8802 "Non-existing map pointer for capture!"); 8803 assert(CurBasePointers.size() == CurPointers.size() && 8804 CurBasePointers.size() == CurSizes.size() && 8805 CurBasePointers.size() == CurMapTypes.size() && 8806 "Inconsistent map information sizes!"); 8807 8808 // If there is an entry in PartialStruct it means we have a struct with 8809 // individual members mapped. Emit an extra combined entry. 8810 if (PartialStruct.Base.isValid()) 8811 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 8812 CurMapTypes, PartialStruct); 8813 8814 // We need to append the results of this capture to what we already have. 8815 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8816 Pointers.append(CurPointers.begin(), CurPointers.end()); 8817 Sizes.append(CurSizes.begin(), CurSizes.end()); 8818 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 8819 } 8820 // Adjust MEMBER_OF flags for the lambdas captures. 8821 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 8822 Pointers, MapTypes); 8823 // Map other list items in the map clause which are not captured variables 8824 // but "declare target link" global variables. 8825 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 8826 MapTypes); 8827 8828 TargetDataInfo Info; 8829 // Fill up the arrays and create the arguments. 8830 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8831 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8832 Info.PointersArray, Info.SizesArray, 8833 Info.MapTypesArray, Info); 8834 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8835 InputInfo.BasePointersArray = 8836 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8837 InputInfo.PointersArray = 8838 Address(Info.PointersArray, CGM.getPointerAlign()); 8839 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 8840 MapTypesArray = Info.MapTypesArray; 8841 if (RequiresOuterTask) 8842 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8843 else 8844 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8845 }; 8846 8847 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 8848 CodeGenFunction &CGF, PrePostActionTy &) { 8849 if (RequiresOuterTask) { 8850 CodeGenFunction::OMPTargetDataInfo InputInfo; 8851 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 8852 } else { 8853 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 8854 } 8855 }; 8856 8857 // If we have a target function ID it means that we need to support 8858 // offloading, otherwise, just execute on the host. We need to execute on host 8859 // regardless of the conditional in the if clause if, e.g., the user do not 8860 // specify target triples. 8861 if (OutlinedFnID) { 8862 if (IfCond) { 8863 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 8864 } else { 8865 RegionCodeGenTy ThenRCG(TargetThenGen); 8866 ThenRCG(CGF); 8867 } 8868 } else { 8869 RegionCodeGenTy ElseRCG(TargetElseGen); 8870 ElseRCG(CGF); 8871 } 8872 } 8873 8874 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 8875 StringRef ParentName) { 8876 if (!S) 8877 return; 8878 8879 // Codegen OMP target directives that offload compute to the device. 8880 bool RequiresDeviceCodegen = 8881 isa<OMPExecutableDirective>(S) && 8882 isOpenMPTargetExecutionDirective( 8883 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 8884 8885 if (RequiresDeviceCodegen) { 8886 const auto &E = *cast<OMPExecutableDirective>(S); 8887 unsigned DeviceID; 8888 unsigned FileID; 8889 unsigned Line; 8890 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 8891 FileID, Line); 8892 8893 // Is this a target region that should not be emitted as an entry point? If 8894 // so just signal we are done with this target region. 8895 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 8896 ParentName, Line)) 8897 return; 8898 8899 switch (E.getDirectiveKind()) { 8900 case OMPD_target: 8901 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 8902 cast<OMPTargetDirective>(E)); 8903 break; 8904 case OMPD_target_parallel: 8905 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 8906 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 8907 break; 8908 case OMPD_target_teams: 8909 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 8910 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 8911 break; 8912 case OMPD_target_teams_distribute: 8913 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 8914 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 8915 break; 8916 case OMPD_target_teams_distribute_simd: 8917 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 8918 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 8919 break; 8920 case OMPD_target_parallel_for: 8921 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 8922 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 8923 break; 8924 case OMPD_target_parallel_for_simd: 8925 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 8926 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 8927 break; 8928 case OMPD_target_simd: 8929 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 8930 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 8931 break; 8932 case OMPD_target_teams_distribute_parallel_for: 8933 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 8934 CGM, ParentName, 8935 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 8936 break; 8937 case OMPD_target_teams_distribute_parallel_for_simd: 8938 CodeGenFunction:: 8939 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 8940 CGM, ParentName, 8941 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 8942 break; 8943 case OMPD_parallel: 8944 case OMPD_for: 8945 case OMPD_parallel_for: 8946 case OMPD_parallel_sections: 8947 case OMPD_for_simd: 8948 case OMPD_parallel_for_simd: 8949 case OMPD_cancel: 8950 case OMPD_cancellation_point: 8951 case OMPD_ordered: 8952 case OMPD_threadprivate: 8953 case OMPD_allocate: 8954 case OMPD_task: 8955 case OMPD_simd: 8956 case OMPD_sections: 8957 case OMPD_section: 8958 case OMPD_single: 8959 case OMPD_master: 8960 case OMPD_critical: 8961 case OMPD_taskyield: 8962 case OMPD_barrier: 8963 case OMPD_taskwait: 8964 case OMPD_taskgroup: 8965 case OMPD_atomic: 8966 case OMPD_flush: 8967 case OMPD_teams: 8968 case OMPD_target_data: 8969 case OMPD_target_exit_data: 8970 case OMPD_target_enter_data: 8971 case OMPD_distribute: 8972 case OMPD_distribute_simd: 8973 case OMPD_distribute_parallel_for: 8974 case OMPD_distribute_parallel_for_simd: 8975 case OMPD_teams_distribute: 8976 case OMPD_teams_distribute_simd: 8977 case OMPD_teams_distribute_parallel_for: 8978 case OMPD_teams_distribute_parallel_for_simd: 8979 case OMPD_target_update: 8980 case OMPD_declare_simd: 8981 case OMPD_declare_target: 8982 case OMPD_end_declare_target: 8983 case OMPD_declare_reduction: 8984 case OMPD_declare_mapper: 8985 case OMPD_taskloop: 8986 case OMPD_taskloop_simd: 8987 case OMPD_requires: 8988 case OMPD_unknown: 8989 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 8990 } 8991 return; 8992 } 8993 8994 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 8995 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 8996 return; 8997 8998 scanForTargetRegionsFunctions( 8999 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9000 return; 9001 } 9002 9003 // If this is a lambda function, look into its body. 9004 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9005 S = L->getBody(); 9006 9007 // Keep looking for target regions recursively. 9008 for (const Stmt *II : S->children()) 9009 scanForTargetRegionsFunctions(II, ParentName); 9010 } 9011 9012 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9013 // If emitting code for the host, we do not process FD here. Instead we do 9014 // the normal code generation. 9015 if (!CGM.getLangOpts().OpenMPIsDevice) 9016 return false; 9017 9018 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9019 StringRef Name = CGM.getMangledName(GD); 9020 // Try to detect target regions in the function. 9021 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) 9022 scanForTargetRegionsFunctions(FD->getBody(), Name); 9023 9024 // Do not to emit function if it is not marked as declare target. 9025 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9026 AlreadyEmittedTargetFunctions.count(Name) == 0; 9027 } 9028 9029 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9030 if (!CGM.getLangOpts().OpenMPIsDevice) 9031 return false; 9032 9033 // Check if there are Ctors/Dtors in this declaration and look for target 9034 // regions in it. We use the complete variant to produce the kernel name 9035 // mangling. 9036 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9037 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9038 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9039 StringRef ParentName = 9040 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9041 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9042 } 9043 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9044 StringRef ParentName = 9045 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9046 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9047 } 9048 } 9049 9050 // Do not to emit variable if it is not marked as declare target. 9051 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9052 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9053 cast<VarDecl>(GD.getDecl())); 9054 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { 9055 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9056 return true; 9057 } 9058 return false; 9059 } 9060 9061 llvm::Constant * 9062 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9063 const VarDecl *VD) { 9064 assert(VD->getType().isConstant(CGM.getContext()) && 9065 "Expected constant variable."); 9066 StringRef VarName; 9067 llvm::Constant *Addr; 9068 llvm::GlobalValue::LinkageTypes Linkage; 9069 QualType Ty = VD->getType(); 9070 SmallString<128> Buffer; 9071 { 9072 unsigned DeviceID; 9073 unsigned FileID; 9074 unsigned Line; 9075 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9076 FileID, Line); 9077 llvm::raw_svector_ostream OS(Buffer); 9078 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9079 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9080 VarName = OS.str(); 9081 } 9082 Linkage = llvm::GlobalValue::InternalLinkage; 9083 Addr = 9084 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9085 getDefaultFirstprivateAddressSpace()); 9086 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9087 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9088 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9089 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9090 VarName, Addr, VarSize, 9091 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9092 return Addr; 9093 } 9094 9095 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9096 llvm::Constant *Addr) { 9097 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9098 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9099 if (!Res) { 9100 if (CGM.getLangOpts().OpenMPIsDevice) { 9101 // Register non-target variables being emitted in device code (debug info 9102 // may cause this). 9103 StringRef VarName = CGM.getMangledName(VD); 9104 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9105 } 9106 return; 9107 } 9108 // Register declare target variables. 9109 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9110 StringRef VarName; 9111 CharUnits VarSize; 9112 llvm::GlobalValue::LinkageTypes Linkage; 9113 switch (*Res) { 9114 case OMPDeclareTargetDeclAttr::MT_To: 9115 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9116 VarName = CGM.getMangledName(VD); 9117 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9118 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9119 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9120 } else { 9121 VarSize = CharUnits::Zero(); 9122 } 9123 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9124 // Temp solution to prevent optimizations of the internal variables. 9125 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9126 std::string RefName = getName({VarName, "ref"}); 9127 if (!CGM.GetGlobalValue(RefName)) { 9128 llvm::Constant *AddrRef = 9129 getOrCreateInternalVariable(Addr->getType(), RefName); 9130 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9131 GVAddrRef->setConstant(/*Val=*/true); 9132 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9133 GVAddrRef->setInitializer(Addr); 9134 CGM.addCompilerUsedGlobal(GVAddrRef); 9135 } 9136 } 9137 break; 9138 case OMPDeclareTargetDeclAttr::MT_Link: 9139 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9140 if (CGM.getLangOpts().OpenMPIsDevice) { 9141 VarName = Addr->getName(); 9142 Addr = nullptr; 9143 } else { 9144 VarName = getAddrOfDeclareTargetLink(VD).getName(); 9145 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); 9146 } 9147 VarSize = CGM.getPointerSize(); 9148 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9149 break; 9150 } 9151 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9152 VarName, Addr, VarSize, Flags, Linkage); 9153 } 9154 9155 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9156 if (isa<FunctionDecl>(GD.getDecl()) || 9157 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9158 return emitTargetFunctions(GD); 9159 9160 return emitTargetGlobalVariable(GD); 9161 } 9162 9163 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9164 for (const VarDecl *VD : DeferredGlobalVariables) { 9165 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9166 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9167 if (!Res) 9168 continue; 9169 if (*Res == OMPDeclareTargetDeclAttr::MT_To) { 9170 CGM.EmitGlobal(VD); 9171 } else { 9172 assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && 9173 "Expected to or link clauses."); 9174 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 9175 } 9176 } 9177 } 9178 9179 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9180 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9181 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9182 " Expected target-based directive."); 9183 } 9184 9185 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9186 LangAS &AS) { 9187 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9188 return false; 9189 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9190 switch(A->getAllocatorType()) { 9191 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9192 // Not supported, fallback to the default mem space. 9193 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9194 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9195 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9196 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9197 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9198 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9199 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9200 AS = LangAS::Default; 9201 return true; 9202 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9203 llvm_unreachable("Expected predefined allocator for the variables with the " 9204 "static storage."); 9205 } 9206 return false; 9207 } 9208 9209 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9210 CodeGenModule &CGM) 9211 : CGM(CGM) { 9212 if (CGM.getLangOpts().OpenMPIsDevice) { 9213 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9214 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9215 } 9216 } 9217 9218 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9219 if (CGM.getLangOpts().OpenMPIsDevice) 9220 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9221 } 9222 9223 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9224 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9225 return true; 9226 9227 StringRef Name = CGM.getMangledName(GD); 9228 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9229 // Do not to emit function if it is marked as declare target as it was already 9230 // emitted. 9231 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9232 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9233 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9234 return !F->isDeclaration(); 9235 return false; 9236 } 9237 return true; 9238 } 9239 9240 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9241 } 9242 9243 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 9244 // If we have offloading in the current module, we need to emit the entries 9245 // now and register the offloading descriptor. 9246 createOffloadEntriesAndInfoMetadata(); 9247 9248 // Create and register the offloading binary descriptors. This is the main 9249 // entity that captures all the information about offloading in the current 9250 // compilation unit. 9251 return createOffloadingBinaryDescriptorRegistration(); 9252 } 9253 9254 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9255 const OMPExecutableDirective &D, 9256 SourceLocation Loc, 9257 llvm::Function *OutlinedFn, 9258 ArrayRef<llvm::Value *> CapturedVars) { 9259 if (!CGF.HaveInsertPoint()) 9260 return; 9261 9262 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9263 CodeGenFunction::RunCleanupsScope Scope(CGF); 9264 9265 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9266 llvm::Value *Args[] = { 9267 RTLoc, 9268 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9269 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9270 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9271 RealArgs.append(std::begin(Args), std::end(Args)); 9272 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9273 9274 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9275 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9276 } 9277 9278 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9279 const Expr *NumTeams, 9280 const Expr *ThreadLimit, 9281 SourceLocation Loc) { 9282 if (!CGF.HaveInsertPoint()) 9283 return; 9284 9285 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9286 9287 llvm::Value *NumTeamsVal = 9288 NumTeams 9289 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9290 CGF.CGM.Int32Ty, /* isSigned = */ true) 9291 : CGF.Builder.getInt32(0); 9292 9293 llvm::Value *ThreadLimitVal = 9294 ThreadLimit 9295 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9296 CGF.CGM.Int32Ty, /* isSigned = */ true) 9297 : CGF.Builder.getInt32(0); 9298 9299 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9300 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9301 ThreadLimitVal}; 9302 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9303 PushNumTeamsArgs); 9304 } 9305 9306 void CGOpenMPRuntime::emitTargetDataCalls( 9307 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9308 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9309 if (!CGF.HaveInsertPoint()) 9310 return; 9311 9312 // Action used to replace the default codegen action and turn privatization 9313 // off. 9314 PrePostActionTy NoPrivAction; 9315 9316 // Generate the code for the opening of the data environment. Capture all the 9317 // arguments of the runtime call by reference because they are used in the 9318 // closing of the region. 9319 auto &&BeginThenGen = [this, &D, Device, &Info, 9320 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9321 // Fill up the arrays with all the mapped variables. 9322 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9323 MappableExprsHandler::MapValuesArrayTy Pointers; 9324 MappableExprsHandler::MapValuesArrayTy Sizes; 9325 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9326 9327 // Get map clause information. 9328 MappableExprsHandler MCHandler(D, CGF); 9329 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9330 9331 // Fill up the arrays and create the arguments. 9332 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9333 9334 llvm::Value *BasePointersArrayArg = nullptr; 9335 llvm::Value *PointersArrayArg = nullptr; 9336 llvm::Value *SizesArrayArg = nullptr; 9337 llvm::Value *MapTypesArrayArg = nullptr; 9338 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9339 SizesArrayArg, MapTypesArrayArg, Info); 9340 9341 // Emit device ID if any. 9342 llvm::Value *DeviceID = nullptr; 9343 if (Device) { 9344 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9345 CGF.Int64Ty, /*isSigned=*/true); 9346 } else { 9347 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9348 } 9349 9350 // Emit the number of elements in the offloading arrays. 9351 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9352 9353 llvm::Value *OffloadingArgs[] = { 9354 DeviceID, PointerNum, BasePointersArrayArg, 9355 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9356 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 9357 OffloadingArgs); 9358 9359 // If device pointer privatization is required, emit the body of the region 9360 // here. It will have to be duplicated: with and without privatization. 9361 if (!Info.CaptureDeviceAddrMap.empty()) 9362 CodeGen(CGF); 9363 }; 9364 9365 // Generate code for the closing of the data region. 9366 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 9367 PrePostActionTy &) { 9368 assert(Info.isValid() && "Invalid data environment closing arguments."); 9369 9370 llvm::Value *BasePointersArrayArg = nullptr; 9371 llvm::Value *PointersArrayArg = nullptr; 9372 llvm::Value *SizesArrayArg = nullptr; 9373 llvm::Value *MapTypesArrayArg = nullptr; 9374 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9375 SizesArrayArg, MapTypesArrayArg, Info); 9376 9377 // Emit device ID if any. 9378 llvm::Value *DeviceID = nullptr; 9379 if (Device) { 9380 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9381 CGF.Int64Ty, /*isSigned=*/true); 9382 } else { 9383 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9384 } 9385 9386 // Emit the number of elements in the offloading arrays. 9387 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9388 9389 llvm::Value *OffloadingArgs[] = { 9390 DeviceID, PointerNum, BasePointersArrayArg, 9391 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9392 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 9393 OffloadingArgs); 9394 }; 9395 9396 // If we need device pointer privatization, we need to emit the body of the 9397 // region with no privatization in the 'else' branch of the conditional. 9398 // Otherwise, we don't have to do anything. 9399 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 9400 PrePostActionTy &) { 9401 if (!Info.CaptureDeviceAddrMap.empty()) { 9402 CodeGen.setAction(NoPrivAction); 9403 CodeGen(CGF); 9404 } 9405 }; 9406 9407 // We don't have to do anything to close the region if the if clause evaluates 9408 // to false. 9409 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 9410 9411 if (IfCond) { 9412 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 9413 } else { 9414 RegionCodeGenTy RCG(BeginThenGen); 9415 RCG(CGF); 9416 } 9417 9418 // If we don't require privatization of device pointers, we emit the body in 9419 // between the runtime calls. This avoids duplicating the body code. 9420 if (Info.CaptureDeviceAddrMap.empty()) { 9421 CodeGen.setAction(NoPrivAction); 9422 CodeGen(CGF); 9423 } 9424 9425 if (IfCond) { 9426 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 9427 } else { 9428 RegionCodeGenTy RCG(EndThenGen); 9429 RCG(CGF); 9430 } 9431 } 9432 9433 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 9434 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9435 const Expr *Device) { 9436 if (!CGF.HaveInsertPoint()) 9437 return; 9438 9439 assert((isa<OMPTargetEnterDataDirective>(D) || 9440 isa<OMPTargetExitDataDirective>(D) || 9441 isa<OMPTargetUpdateDirective>(D)) && 9442 "Expecting either target enter, exit data, or update directives."); 9443 9444 CodeGenFunction::OMPTargetDataInfo InputInfo; 9445 llvm::Value *MapTypesArray = nullptr; 9446 // Generate the code for the opening of the data environment. 9447 auto &&ThenGen = [this, &D, Device, &InputInfo, 9448 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 9449 // Emit device ID if any. 9450 llvm::Value *DeviceID = nullptr; 9451 if (Device) { 9452 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9453 CGF.Int64Ty, /*isSigned=*/true); 9454 } else { 9455 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9456 } 9457 9458 // Emit the number of elements in the offloading arrays. 9459 llvm::Constant *PointerNum = 9460 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9461 9462 llvm::Value *OffloadingArgs[] = {DeviceID, 9463 PointerNum, 9464 InputInfo.BasePointersArray.getPointer(), 9465 InputInfo.PointersArray.getPointer(), 9466 InputInfo.SizesArray.getPointer(), 9467 MapTypesArray}; 9468 9469 // Select the right runtime function call for each expected standalone 9470 // directive. 9471 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9472 OpenMPRTLFunction RTLFn; 9473 switch (D.getDirectiveKind()) { 9474 case OMPD_target_enter_data: 9475 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 9476 : OMPRTL__tgt_target_data_begin; 9477 break; 9478 case OMPD_target_exit_data: 9479 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 9480 : OMPRTL__tgt_target_data_end; 9481 break; 9482 case OMPD_target_update: 9483 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 9484 : OMPRTL__tgt_target_data_update; 9485 break; 9486 case OMPD_parallel: 9487 case OMPD_for: 9488 case OMPD_parallel_for: 9489 case OMPD_parallel_sections: 9490 case OMPD_for_simd: 9491 case OMPD_parallel_for_simd: 9492 case OMPD_cancel: 9493 case OMPD_cancellation_point: 9494 case OMPD_ordered: 9495 case OMPD_threadprivate: 9496 case OMPD_allocate: 9497 case OMPD_task: 9498 case OMPD_simd: 9499 case OMPD_sections: 9500 case OMPD_section: 9501 case OMPD_single: 9502 case OMPD_master: 9503 case OMPD_critical: 9504 case OMPD_taskyield: 9505 case OMPD_barrier: 9506 case OMPD_taskwait: 9507 case OMPD_taskgroup: 9508 case OMPD_atomic: 9509 case OMPD_flush: 9510 case OMPD_teams: 9511 case OMPD_target_data: 9512 case OMPD_distribute: 9513 case OMPD_distribute_simd: 9514 case OMPD_distribute_parallel_for: 9515 case OMPD_distribute_parallel_for_simd: 9516 case OMPD_teams_distribute: 9517 case OMPD_teams_distribute_simd: 9518 case OMPD_teams_distribute_parallel_for: 9519 case OMPD_teams_distribute_parallel_for_simd: 9520 case OMPD_declare_simd: 9521 case OMPD_declare_target: 9522 case OMPD_end_declare_target: 9523 case OMPD_declare_reduction: 9524 case OMPD_declare_mapper: 9525 case OMPD_taskloop: 9526 case OMPD_taskloop_simd: 9527 case OMPD_target: 9528 case OMPD_target_simd: 9529 case OMPD_target_teams_distribute: 9530 case OMPD_target_teams_distribute_simd: 9531 case OMPD_target_teams_distribute_parallel_for: 9532 case OMPD_target_teams_distribute_parallel_for_simd: 9533 case OMPD_target_teams: 9534 case OMPD_target_parallel: 9535 case OMPD_target_parallel_for: 9536 case OMPD_target_parallel_for_simd: 9537 case OMPD_requires: 9538 case OMPD_unknown: 9539 llvm_unreachable("Unexpected standalone target data directive."); 9540 break; 9541 } 9542 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 9543 }; 9544 9545 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 9546 CodeGenFunction &CGF, PrePostActionTy &) { 9547 // Fill up the arrays with all the mapped variables. 9548 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9549 MappableExprsHandler::MapValuesArrayTy Pointers; 9550 MappableExprsHandler::MapValuesArrayTy Sizes; 9551 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9552 9553 // Get map clause information. 9554 MappableExprsHandler MEHandler(D, CGF); 9555 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9556 9557 TargetDataInfo Info; 9558 // Fill up the arrays and create the arguments. 9559 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9560 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9561 Info.PointersArray, Info.SizesArray, 9562 Info.MapTypesArray, Info); 9563 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9564 InputInfo.BasePointersArray = 9565 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9566 InputInfo.PointersArray = 9567 Address(Info.PointersArray, CGM.getPointerAlign()); 9568 InputInfo.SizesArray = 9569 Address(Info.SizesArray, CGM.getPointerAlign()); 9570 MapTypesArray = Info.MapTypesArray; 9571 if (D.hasClausesOfKind<OMPDependClause>()) 9572 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9573 else 9574 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9575 }; 9576 9577 if (IfCond) { 9578 emitOMPIfClause(CGF, IfCond, TargetThenGen, 9579 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 9580 } else { 9581 RegionCodeGenTy ThenRCG(TargetThenGen); 9582 ThenRCG(CGF); 9583 } 9584 } 9585 9586 namespace { 9587 /// Kind of parameter in a function with 'declare simd' directive. 9588 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 9589 /// Attribute set of the parameter. 9590 struct ParamAttrTy { 9591 ParamKindTy Kind = Vector; 9592 llvm::APSInt StrideOrArg; 9593 llvm::APSInt Alignment; 9594 }; 9595 } // namespace 9596 9597 static unsigned evaluateCDTSize(const FunctionDecl *FD, 9598 ArrayRef<ParamAttrTy> ParamAttrs) { 9599 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 9600 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 9601 // of that clause. The VLEN value must be power of 2. 9602 // In other case the notion of the function`s "characteristic data type" (CDT) 9603 // is used to compute the vector length. 9604 // CDT is defined in the following order: 9605 // a) For non-void function, the CDT is the return type. 9606 // b) If the function has any non-uniform, non-linear parameters, then the 9607 // CDT is the type of the first such parameter. 9608 // c) If the CDT determined by a) or b) above is struct, union, or class 9609 // type which is pass-by-value (except for the type that maps to the 9610 // built-in complex data type), the characteristic data type is int. 9611 // d) If none of the above three cases is applicable, the CDT is int. 9612 // The VLEN is then determined based on the CDT and the size of vector 9613 // register of that ISA for which current vector version is generated. The 9614 // VLEN is computed using the formula below: 9615 // VLEN = sizeof(vector_register) / sizeof(CDT), 9616 // where vector register size specified in section 3.2.1 Registers and the 9617 // Stack Frame of original AMD64 ABI document. 9618 QualType RetType = FD->getReturnType(); 9619 if (RetType.isNull()) 9620 return 0; 9621 ASTContext &C = FD->getASTContext(); 9622 QualType CDT; 9623 if (!RetType.isNull() && !RetType->isVoidType()) { 9624 CDT = RetType; 9625 } else { 9626 unsigned Offset = 0; 9627 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 9628 if (ParamAttrs[Offset].Kind == Vector) 9629 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 9630 ++Offset; 9631 } 9632 if (CDT.isNull()) { 9633 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9634 if (ParamAttrs[I + Offset].Kind == Vector) { 9635 CDT = FD->getParamDecl(I)->getType(); 9636 break; 9637 } 9638 } 9639 } 9640 } 9641 if (CDT.isNull()) 9642 CDT = C.IntTy; 9643 CDT = CDT->getCanonicalTypeUnqualified(); 9644 if (CDT->isRecordType() || CDT->isUnionType()) 9645 CDT = C.IntTy; 9646 return C.getTypeSize(CDT); 9647 } 9648 9649 static void 9650 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 9651 const llvm::APSInt &VLENVal, 9652 ArrayRef<ParamAttrTy> ParamAttrs, 9653 OMPDeclareSimdDeclAttr::BranchStateTy State) { 9654 struct ISADataTy { 9655 char ISA; 9656 unsigned VecRegSize; 9657 }; 9658 ISADataTy ISAData[] = { 9659 { 9660 'b', 128 9661 }, // SSE 9662 { 9663 'c', 256 9664 }, // AVX 9665 { 9666 'd', 256 9667 }, // AVX2 9668 { 9669 'e', 512 9670 }, // AVX512 9671 }; 9672 llvm::SmallVector<char, 2> Masked; 9673 switch (State) { 9674 case OMPDeclareSimdDeclAttr::BS_Undefined: 9675 Masked.push_back('N'); 9676 Masked.push_back('M'); 9677 break; 9678 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9679 Masked.push_back('N'); 9680 break; 9681 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9682 Masked.push_back('M'); 9683 break; 9684 } 9685 for (char Mask : Masked) { 9686 for (const ISADataTy &Data : ISAData) { 9687 SmallString<256> Buffer; 9688 llvm::raw_svector_ostream Out(Buffer); 9689 Out << "_ZGV" << Data.ISA << Mask; 9690 if (!VLENVal) { 9691 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 9692 evaluateCDTSize(FD, ParamAttrs)); 9693 } else { 9694 Out << VLENVal; 9695 } 9696 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 9697 switch (ParamAttr.Kind){ 9698 case LinearWithVarStride: 9699 Out << 's' << ParamAttr.StrideOrArg; 9700 break; 9701 case Linear: 9702 Out << 'l'; 9703 if (!!ParamAttr.StrideOrArg) 9704 Out << ParamAttr.StrideOrArg; 9705 break; 9706 case Uniform: 9707 Out << 'u'; 9708 break; 9709 case Vector: 9710 Out << 'v'; 9711 break; 9712 } 9713 if (!!ParamAttr.Alignment) 9714 Out << 'a' << ParamAttr.Alignment; 9715 } 9716 Out << '_' << Fn->getName(); 9717 Fn->addFnAttr(Out.str()); 9718 } 9719 } 9720 } 9721 9722 // This are the Functions that are needed to mangle the name of the 9723 // vector functions generated by the compiler, according to the rules 9724 // defined in the "Vector Function ABI specifications for AArch64", 9725 // available at 9726 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 9727 9728 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 9729 /// 9730 /// TODO: Need to implement the behavior for reference marked with a 9731 /// var or no linear modifiers (1.b in the section). For this, we 9732 /// need to extend ParamKindTy to support the linear modifiers. 9733 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 9734 QT = QT.getCanonicalType(); 9735 9736 if (QT->isVoidType()) 9737 return false; 9738 9739 if (Kind == ParamKindTy::Uniform) 9740 return false; 9741 9742 if (Kind == ParamKindTy::Linear) 9743 return false; 9744 9745 // TODO: Handle linear references with modifiers 9746 9747 if (Kind == ParamKindTy::LinearWithVarStride) 9748 return false; 9749 9750 return true; 9751 } 9752 9753 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 9754 static bool getAArch64PBV(QualType QT, ASTContext &C) { 9755 QT = QT.getCanonicalType(); 9756 unsigned Size = C.getTypeSize(QT); 9757 9758 // Only scalars and complex within 16 bytes wide set PVB to true. 9759 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 9760 return false; 9761 9762 if (QT->isFloatingType()) 9763 return true; 9764 9765 if (QT->isIntegerType()) 9766 return true; 9767 9768 if (QT->isPointerType()) 9769 return true; 9770 9771 // TODO: Add support for complex types (section 3.1.2, item 2). 9772 9773 return false; 9774 } 9775 9776 /// Computes the lane size (LS) of a return type or of an input parameter, 9777 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 9778 /// TODO: Add support for references, section 3.2.1, item 1. 9779 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 9780 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 9781 QualType PTy = QT.getCanonicalType()->getPointeeType(); 9782 if (getAArch64PBV(PTy, C)) 9783 return C.getTypeSize(PTy); 9784 } 9785 if (getAArch64PBV(QT, C)) 9786 return C.getTypeSize(QT); 9787 9788 return C.getTypeSize(C.getUIntPtrType()); 9789 } 9790 9791 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 9792 // signature of the scalar function, as defined in 3.2.2 of the 9793 // AAVFABI. 9794 static std::tuple<unsigned, unsigned, bool> 9795 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 9796 QualType RetType = FD->getReturnType().getCanonicalType(); 9797 9798 ASTContext &C = FD->getASTContext(); 9799 9800 bool OutputBecomesInput = false; 9801 9802 llvm::SmallVector<unsigned, 8> Sizes; 9803 if (!RetType->isVoidType()) { 9804 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 9805 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 9806 OutputBecomesInput = true; 9807 } 9808 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9809 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 9810 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 9811 } 9812 9813 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 9814 // The LS of a function parameter / return value can only be a power 9815 // of 2, starting from 8 bits, up to 128. 9816 assert(std::all_of(Sizes.begin(), Sizes.end(), 9817 [](unsigned Size) { 9818 return Size == 8 || Size == 16 || Size == 32 || 9819 Size == 64 || Size == 128; 9820 }) && 9821 "Invalid size"); 9822 9823 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 9824 *std::max_element(std::begin(Sizes), std::end(Sizes)), 9825 OutputBecomesInput); 9826 } 9827 9828 /// Mangle the parameter part of the vector function name according to 9829 /// their OpenMP classification. The mangling function is defined in 9830 /// section 3.5 of the AAVFABI. 9831 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 9832 SmallString<256> Buffer; 9833 llvm::raw_svector_ostream Out(Buffer); 9834 for (const auto &ParamAttr : ParamAttrs) { 9835 switch (ParamAttr.Kind) { 9836 case LinearWithVarStride: 9837 Out << "ls" << ParamAttr.StrideOrArg; 9838 break; 9839 case Linear: 9840 Out << 'l'; 9841 // Don't print the step value if it is not present or if it is 9842 // equal to 1. 9843 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 9844 Out << ParamAttr.StrideOrArg; 9845 break; 9846 case Uniform: 9847 Out << 'u'; 9848 break; 9849 case Vector: 9850 Out << 'v'; 9851 break; 9852 } 9853 9854 if (!!ParamAttr.Alignment) 9855 Out << 'a' << ParamAttr.Alignment; 9856 } 9857 9858 return Out.str(); 9859 } 9860 9861 // Function used to add the attribute. The parameter `VLEN` is 9862 // templated to allow the use of "x" when targeting scalable functions 9863 // for SVE. 9864 template <typename T> 9865 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 9866 char ISA, StringRef ParSeq, 9867 StringRef MangledName, bool OutputBecomesInput, 9868 llvm::Function *Fn) { 9869 SmallString<256> Buffer; 9870 llvm::raw_svector_ostream Out(Buffer); 9871 Out << Prefix << ISA << LMask << VLEN; 9872 if (OutputBecomesInput) 9873 Out << "v"; 9874 Out << ParSeq << "_" << MangledName; 9875 Fn->addFnAttr(Out.str()); 9876 } 9877 9878 // Helper function to generate the Advanced SIMD names depending on 9879 // the value of the NDS when simdlen is not present. 9880 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 9881 StringRef Prefix, char ISA, 9882 StringRef ParSeq, StringRef MangledName, 9883 bool OutputBecomesInput, 9884 llvm::Function *Fn) { 9885 switch (NDS) { 9886 case 8: 9887 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 9888 OutputBecomesInput, Fn); 9889 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 9890 OutputBecomesInput, Fn); 9891 break; 9892 case 16: 9893 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 9894 OutputBecomesInput, Fn); 9895 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 9896 OutputBecomesInput, Fn); 9897 break; 9898 case 32: 9899 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 9900 OutputBecomesInput, Fn); 9901 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 9902 OutputBecomesInput, Fn); 9903 break; 9904 case 64: 9905 case 128: 9906 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 9907 OutputBecomesInput, Fn); 9908 break; 9909 default: 9910 llvm_unreachable("Scalar type is too wide."); 9911 } 9912 } 9913 9914 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 9915 static void emitAArch64DeclareSimdFunction( 9916 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 9917 ArrayRef<ParamAttrTy> ParamAttrs, 9918 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 9919 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 9920 9921 // Get basic data for building the vector signature. 9922 const auto Data = getNDSWDS(FD, ParamAttrs); 9923 const unsigned NDS = std::get<0>(Data); 9924 const unsigned WDS = std::get<1>(Data); 9925 const bool OutputBecomesInput = std::get<2>(Data); 9926 9927 // Check the values provided via `simdlen` by the user. 9928 // 1. A `simdlen(1)` doesn't produce vector signatures, 9929 if (UserVLEN == 1) { 9930 unsigned DiagID = CGM.getDiags().getCustomDiagID( 9931 DiagnosticsEngine::Warning, 9932 "The clause simdlen(1) has no effect when targeting aarch64."); 9933 CGM.getDiags().Report(SLoc, DiagID); 9934 return; 9935 } 9936 9937 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 9938 // Advanced SIMD output. 9939 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 9940 unsigned DiagID = CGM.getDiags().getCustomDiagID( 9941 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 9942 "power of 2 when targeting Advanced SIMD."); 9943 CGM.getDiags().Report(SLoc, DiagID); 9944 return; 9945 } 9946 9947 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 9948 // limits. 9949 if (ISA == 's' && UserVLEN != 0) { 9950 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 9951 unsigned DiagID = CGM.getDiags().getCustomDiagID( 9952 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 9953 "lanes in the architectural constraints " 9954 "for SVE (min is 128-bit, max is " 9955 "2048-bit, by steps of 128-bit)"); 9956 CGM.getDiags().Report(SLoc, DiagID) << WDS; 9957 return; 9958 } 9959 } 9960 9961 // Sort out parameter sequence. 9962 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 9963 StringRef Prefix = "_ZGV"; 9964 // Generate simdlen from user input (if any). 9965 if (UserVLEN) { 9966 if (ISA == 's') { 9967 // SVE generates only a masked function. 9968 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 9969 OutputBecomesInput, Fn); 9970 } else { 9971 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 9972 // Advanced SIMD generates one or two functions, depending on 9973 // the `[not]inbranch` clause. 9974 switch (State) { 9975 case OMPDeclareSimdDeclAttr::BS_Undefined: 9976 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 9977 OutputBecomesInput, Fn); 9978 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 9979 OutputBecomesInput, Fn); 9980 break; 9981 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9982 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 9983 OutputBecomesInput, Fn); 9984 break; 9985 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9986 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 9987 OutputBecomesInput, Fn); 9988 break; 9989 } 9990 } 9991 } else { 9992 // If no user simdlen is provided, follow the AAVFABI rules for 9993 // generating the vector length. 9994 if (ISA == 's') { 9995 // SVE, section 3.4.1, item 1. 9996 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 9997 OutputBecomesInput, Fn); 9998 } else { 9999 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10000 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10001 // two vector names depending on the use of the clause 10002 // `[not]inbranch`. 10003 switch (State) { 10004 case OMPDeclareSimdDeclAttr::BS_Undefined: 10005 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10006 OutputBecomesInput, Fn); 10007 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10008 OutputBecomesInput, Fn); 10009 break; 10010 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10011 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10012 OutputBecomesInput, Fn); 10013 break; 10014 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10015 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10016 OutputBecomesInput, Fn); 10017 break; 10018 } 10019 } 10020 } 10021 } 10022 10023 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10024 llvm::Function *Fn) { 10025 ASTContext &C = CGM.getContext(); 10026 FD = FD->getMostRecentDecl(); 10027 // Map params to their positions in function decl. 10028 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10029 if (isa<CXXMethodDecl>(FD)) 10030 ParamPositions.try_emplace(FD, 0); 10031 unsigned ParamPos = ParamPositions.size(); 10032 for (const ParmVarDecl *P : FD->parameters()) { 10033 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10034 ++ParamPos; 10035 } 10036 while (FD) { 10037 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10038 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10039 // Mark uniform parameters. 10040 for (const Expr *E : Attr->uniforms()) { 10041 E = E->IgnoreParenImpCasts(); 10042 unsigned Pos; 10043 if (isa<CXXThisExpr>(E)) { 10044 Pos = ParamPositions[FD]; 10045 } else { 10046 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10047 ->getCanonicalDecl(); 10048 Pos = ParamPositions[PVD]; 10049 } 10050 ParamAttrs[Pos].Kind = Uniform; 10051 } 10052 // Get alignment info. 10053 auto NI = Attr->alignments_begin(); 10054 for (const Expr *E : Attr->aligneds()) { 10055 E = E->IgnoreParenImpCasts(); 10056 unsigned Pos; 10057 QualType ParmTy; 10058 if (isa<CXXThisExpr>(E)) { 10059 Pos = ParamPositions[FD]; 10060 ParmTy = E->getType(); 10061 } else { 10062 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10063 ->getCanonicalDecl(); 10064 Pos = ParamPositions[PVD]; 10065 ParmTy = PVD->getType(); 10066 } 10067 ParamAttrs[Pos].Alignment = 10068 (*NI) 10069 ? (*NI)->EvaluateKnownConstInt(C) 10070 : llvm::APSInt::getUnsigned( 10071 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10072 .getQuantity()); 10073 ++NI; 10074 } 10075 // Mark linear parameters. 10076 auto SI = Attr->steps_begin(); 10077 auto MI = Attr->modifiers_begin(); 10078 for (const Expr *E : Attr->linears()) { 10079 E = E->IgnoreParenImpCasts(); 10080 unsigned Pos; 10081 if (isa<CXXThisExpr>(E)) { 10082 Pos = ParamPositions[FD]; 10083 } else { 10084 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10085 ->getCanonicalDecl(); 10086 Pos = ParamPositions[PVD]; 10087 } 10088 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10089 ParamAttr.Kind = Linear; 10090 if (*SI) { 10091 Expr::EvalResult Result; 10092 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10093 if (const auto *DRE = 10094 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10095 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10096 ParamAttr.Kind = LinearWithVarStride; 10097 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10098 ParamPositions[StridePVD->getCanonicalDecl()]); 10099 } 10100 } 10101 } else { 10102 ParamAttr.StrideOrArg = Result.Val.getInt(); 10103 } 10104 } 10105 ++SI; 10106 ++MI; 10107 } 10108 llvm::APSInt VLENVal; 10109 SourceLocation ExprLoc; 10110 const Expr *VLENExpr = Attr->getSimdlen(); 10111 if (VLENExpr) { 10112 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10113 ExprLoc = VLENExpr->getExprLoc(); 10114 } 10115 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10116 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10117 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10118 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10119 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10120 unsigned VLEN = VLENVal.getExtValue(); 10121 StringRef MangledName = Fn->getName(); 10122 if (CGM.getTarget().hasFeature("sve")) 10123 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10124 MangledName, 's', 128, Fn, ExprLoc); 10125 if (CGM.getTarget().hasFeature("neon")) 10126 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10127 MangledName, 'n', 128, Fn, ExprLoc); 10128 } 10129 } 10130 FD = FD->getPreviousDecl(); 10131 } 10132 } 10133 10134 namespace { 10135 /// Cleanup action for doacross support. 10136 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10137 public: 10138 static const int DoacrossFinArgs = 2; 10139 10140 private: 10141 llvm::FunctionCallee RTLFn; 10142 llvm::Value *Args[DoacrossFinArgs]; 10143 10144 public: 10145 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10146 ArrayRef<llvm::Value *> CallArgs) 10147 : RTLFn(RTLFn) { 10148 assert(CallArgs.size() == DoacrossFinArgs); 10149 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10150 } 10151 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10152 if (!CGF.HaveInsertPoint()) 10153 return; 10154 CGF.EmitRuntimeCall(RTLFn, Args); 10155 } 10156 }; 10157 } // namespace 10158 10159 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10160 const OMPLoopDirective &D, 10161 ArrayRef<Expr *> NumIterations) { 10162 if (!CGF.HaveInsertPoint()) 10163 return; 10164 10165 ASTContext &C = CGM.getContext(); 10166 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10167 RecordDecl *RD; 10168 if (KmpDimTy.isNull()) { 10169 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10170 // kmp_int64 lo; // lower 10171 // kmp_int64 up; // upper 10172 // kmp_int64 st; // stride 10173 // }; 10174 RD = C.buildImplicitRecord("kmp_dim"); 10175 RD->startDefinition(); 10176 addFieldToRecordDecl(C, RD, Int64Ty); 10177 addFieldToRecordDecl(C, RD, Int64Ty); 10178 addFieldToRecordDecl(C, RD, Int64Ty); 10179 RD->completeDefinition(); 10180 KmpDimTy = C.getRecordType(RD); 10181 } else { 10182 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10183 } 10184 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10185 QualType ArrayTy = 10186 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 10187 10188 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10189 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10190 enum { LowerFD = 0, UpperFD, StrideFD }; 10191 // Fill dims with data. 10192 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10193 LValue DimsLVal = CGF.MakeAddrLValue( 10194 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10195 // dims.upper = num_iterations; 10196 LValue UpperLVal = CGF.EmitLValueForField( 10197 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10198 llvm::Value *NumIterVal = 10199 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10200 D.getNumIterations()->getType(), Int64Ty, 10201 D.getNumIterations()->getExprLoc()); 10202 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10203 // dims.stride = 1; 10204 LValue StrideLVal = CGF.EmitLValueForField( 10205 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10206 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10207 StrideLVal); 10208 } 10209 10210 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10211 // kmp_int32 num_dims, struct kmp_dim * dims); 10212 llvm::Value *Args[] = { 10213 emitUpdateLocation(CGF, D.getBeginLoc()), 10214 getThreadID(CGF, D.getBeginLoc()), 10215 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10216 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10217 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10218 CGM.VoidPtrTy)}; 10219 10220 llvm::FunctionCallee RTLFn = 10221 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10222 CGF.EmitRuntimeCall(RTLFn, Args); 10223 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10224 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10225 llvm::FunctionCallee FiniRTLFn = 10226 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10227 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10228 llvm::makeArrayRef(FiniArgs)); 10229 } 10230 10231 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10232 const OMPDependClause *C) { 10233 QualType Int64Ty = 10234 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10235 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10236 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10237 Int64Ty, Size, ArrayType::Normal, 0); 10238 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10239 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10240 const Expr *CounterVal = C->getLoopData(I); 10241 assert(CounterVal); 10242 llvm::Value *CntVal = CGF.EmitScalarConversion( 10243 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10244 CounterVal->getExprLoc()); 10245 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10246 /*Volatile=*/false, Int64Ty); 10247 } 10248 llvm::Value *Args[] = { 10249 emitUpdateLocation(CGF, C->getBeginLoc()), 10250 getThreadID(CGF, C->getBeginLoc()), 10251 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10252 llvm::FunctionCallee RTLFn; 10253 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10254 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10255 } else { 10256 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10257 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10258 } 10259 CGF.EmitRuntimeCall(RTLFn, Args); 10260 } 10261 10262 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10263 llvm::FunctionCallee Callee, 10264 ArrayRef<llvm::Value *> Args) const { 10265 assert(Loc.isValid() && "Outlined function call location must be valid."); 10266 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10267 10268 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10269 if (Fn->doesNotThrow()) { 10270 CGF.EmitNounwindRuntimeCall(Fn, Args); 10271 return; 10272 } 10273 } 10274 CGF.EmitRuntimeCall(Callee, Args); 10275 } 10276 10277 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10278 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10279 ArrayRef<llvm::Value *> Args) const { 10280 emitCall(CGF, Loc, OutlinedFn, Args); 10281 } 10282 10283 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10284 const VarDecl *NativeParam, 10285 const VarDecl *TargetParam) const { 10286 return CGF.GetAddrOfLocalVar(NativeParam); 10287 } 10288 10289 namespace { 10290 /// Cleanup action for allocate support. 10291 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10292 public: 10293 static const int CleanupArgs = 3; 10294 10295 private: 10296 llvm::FunctionCallee RTLFn; 10297 llvm::Value *Args[CleanupArgs]; 10298 10299 public: 10300 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10301 ArrayRef<llvm::Value *> CallArgs) 10302 : RTLFn(RTLFn) { 10303 assert(CallArgs.size() == CleanupArgs && 10304 "Size of arguments does not match."); 10305 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10306 } 10307 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10308 if (!CGF.HaveInsertPoint()) 10309 return; 10310 CGF.EmitRuntimeCall(RTLFn, Args); 10311 } 10312 }; 10313 } // namespace 10314 10315 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10316 const VarDecl *VD) { 10317 if (!VD) 10318 return Address::invalid(); 10319 const VarDecl *CVD = VD->getCanonicalDecl(); 10320 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10321 return Address::invalid(); 10322 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10323 // Use the default allocation. 10324 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10325 !AA->getAllocator()) 10326 return Address::invalid(); 10327 llvm::Value *Size; 10328 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10329 if (CVD->getType()->isVariablyModifiedType()) { 10330 Size = CGF.getTypeSize(CVD->getType()); 10331 // Align the size: ((size + align - 1) / align) * align 10332 Size = CGF.Builder.CreateNUWAdd( 10333 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10334 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10335 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10336 } else { 10337 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10338 Size = CGM.getSize(Sz.alignTo(Align)); 10339 } 10340 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10341 assert(AA->getAllocator() && 10342 "Expected allocator expression for non-default allocator."); 10343 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 10344 // According to the standard, the original allocator type is a enum (integer). 10345 // Convert to pointer type, if required. 10346 if (Allocator->getType()->isIntegerTy()) 10347 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 10348 else if (Allocator->getType()->isPointerTy()) 10349 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 10350 CGM.VoidPtrTy); 10351 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 10352 10353 llvm::Value *Addr = 10354 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 10355 CVD->getName() + ".void.addr"); 10356 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 10357 Allocator}; 10358 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 10359 10360 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10361 llvm::makeArrayRef(FiniArgs)); 10362 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10363 Addr, 10364 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 10365 CVD->getName() + ".addr"); 10366 return Address(Addr, Align); 10367 } 10368 10369 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 10370 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10371 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10372 llvm_unreachable("Not supported in SIMD-only mode"); 10373 } 10374 10375 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 10376 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10377 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10378 llvm_unreachable("Not supported in SIMD-only mode"); 10379 } 10380 10381 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 10382 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10383 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 10384 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 10385 bool Tied, unsigned &NumberOfParts) { 10386 llvm_unreachable("Not supported in SIMD-only mode"); 10387 } 10388 10389 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 10390 SourceLocation Loc, 10391 llvm::Function *OutlinedFn, 10392 ArrayRef<llvm::Value *> CapturedVars, 10393 const Expr *IfCond) { 10394 llvm_unreachable("Not supported in SIMD-only mode"); 10395 } 10396 10397 void CGOpenMPSIMDRuntime::emitCriticalRegion( 10398 CodeGenFunction &CGF, StringRef CriticalName, 10399 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 10400 const Expr *Hint) { 10401 llvm_unreachable("Not supported in SIMD-only mode"); 10402 } 10403 10404 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 10405 const RegionCodeGenTy &MasterOpGen, 10406 SourceLocation Loc) { 10407 llvm_unreachable("Not supported in SIMD-only mode"); 10408 } 10409 10410 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 10411 SourceLocation Loc) { 10412 llvm_unreachable("Not supported in SIMD-only mode"); 10413 } 10414 10415 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 10416 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 10417 SourceLocation Loc) { 10418 llvm_unreachable("Not supported in SIMD-only mode"); 10419 } 10420 10421 void CGOpenMPSIMDRuntime::emitSingleRegion( 10422 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 10423 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 10424 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 10425 ArrayRef<const Expr *> AssignmentOps) { 10426 llvm_unreachable("Not supported in SIMD-only mode"); 10427 } 10428 10429 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 10430 const RegionCodeGenTy &OrderedOpGen, 10431 SourceLocation Loc, 10432 bool IsThreads) { 10433 llvm_unreachable("Not supported in SIMD-only mode"); 10434 } 10435 10436 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 10437 SourceLocation Loc, 10438 OpenMPDirectiveKind Kind, 10439 bool EmitChecks, 10440 bool ForceSimpleCall) { 10441 llvm_unreachable("Not supported in SIMD-only mode"); 10442 } 10443 10444 void CGOpenMPSIMDRuntime::emitForDispatchInit( 10445 CodeGenFunction &CGF, SourceLocation Loc, 10446 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 10447 bool Ordered, const DispatchRTInput &DispatchValues) { 10448 llvm_unreachable("Not supported in SIMD-only mode"); 10449 } 10450 10451 void CGOpenMPSIMDRuntime::emitForStaticInit( 10452 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 10453 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 10454 llvm_unreachable("Not supported in SIMD-only mode"); 10455 } 10456 10457 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 10458 CodeGenFunction &CGF, SourceLocation Loc, 10459 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 10460 llvm_unreachable("Not supported in SIMD-only mode"); 10461 } 10462 10463 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 10464 SourceLocation Loc, 10465 unsigned IVSize, 10466 bool IVSigned) { 10467 llvm_unreachable("Not supported in SIMD-only mode"); 10468 } 10469 10470 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 10471 SourceLocation Loc, 10472 OpenMPDirectiveKind DKind) { 10473 llvm_unreachable("Not supported in SIMD-only mode"); 10474 } 10475 10476 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 10477 SourceLocation Loc, 10478 unsigned IVSize, bool IVSigned, 10479 Address IL, Address LB, 10480 Address UB, Address ST) { 10481 llvm_unreachable("Not supported in SIMD-only mode"); 10482 } 10483 10484 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 10485 llvm::Value *NumThreads, 10486 SourceLocation Loc) { 10487 llvm_unreachable("Not supported in SIMD-only mode"); 10488 } 10489 10490 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 10491 OpenMPProcBindClauseKind ProcBind, 10492 SourceLocation Loc) { 10493 llvm_unreachable("Not supported in SIMD-only mode"); 10494 } 10495 10496 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 10497 const VarDecl *VD, 10498 Address VDAddr, 10499 SourceLocation Loc) { 10500 llvm_unreachable("Not supported in SIMD-only mode"); 10501 } 10502 10503 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 10504 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 10505 CodeGenFunction *CGF) { 10506 llvm_unreachable("Not supported in SIMD-only mode"); 10507 } 10508 10509 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 10510 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 10511 llvm_unreachable("Not supported in SIMD-only mode"); 10512 } 10513 10514 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 10515 ArrayRef<const Expr *> Vars, 10516 SourceLocation Loc) { 10517 llvm_unreachable("Not supported in SIMD-only mode"); 10518 } 10519 10520 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 10521 const OMPExecutableDirective &D, 10522 llvm::Function *TaskFunction, 10523 QualType SharedsTy, Address Shareds, 10524 const Expr *IfCond, 10525 const OMPTaskDataTy &Data) { 10526 llvm_unreachable("Not supported in SIMD-only mode"); 10527 } 10528 10529 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 10530 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 10531 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 10532 const Expr *IfCond, const OMPTaskDataTy &Data) { 10533 llvm_unreachable("Not supported in SIMD-only mode"); 10534 } 10535 10536 void CGOpenMPSIMDRuntime::emitReduction( 10537 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 10538 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 10539 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 10540 assert(Options.SimpleReduction && "Only simple reduction is expected."); 10541 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 10542 ReductionOps, Options); 10543 } 10544 10545 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 10546 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 10547 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 10548 llvm_unreachable("Not supported in SIMD-only mode"); 10549 } 10550 10551 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 10552 SourceLocation Loc, 10553 ReductionCodeGen &RCG, 10554 unsigned N) { 10555 llvm_unreachable("Not supported in SIMD-only mode"); 10556 } 10557 10558 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 10559 SourceLocation Loc, 10560 llvm::Value *ReductionsPtr, 10561 LValue SharedLVal) { 10562 llvm_unreachable("Not supported in SIMD-only mode"); 10563 } 10564 10565 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 10566 SourceLocation Loc) { 10567 llvm_unreachable("Not supported in SIMD-only mode"); 10568 } 10569 10570 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 10571 CodeGenFunction &CGF, SourceLocation Loc, 10572 OpenMPDirectiveKind CancelRegion) { 10573 llvm_unreachable("Not supported in SIMD-only mode"); 10574 } 10575 10576 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 10577 SourceLocation Loc, const Expr *IfCond, 10578 OpenMPDirectiveKind CancelRegion) { 10579 llvm_unreachable("Not supported in SIMD-only mode"); 10580 } 10581 10582 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 10583 const OMPExecutableDirective &D, StringRef ParentName, 10584 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 10585 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 10586 llvm_unreachable("Not supported in SIMD-only mode"); 10587 } 10588 10589 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 10590 const OMPExecutableDirective &D, 10591 llvm::Function *OutlinedFn, 10592 llvm::Value *OutlinedFnID, 10593 const Expr *IfCond, 10594 const Expr *Device) { 10595 llvm_unreachable("Not supported in SIMD-only mode"); 10596 } 10597 10598 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 10599 llvm_unreachable("Not supported in SIMD-only mode"); 10600 } 10601 10602 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10603 llvm_unreachable("Not supported in SIMD-only mode"); 10604 } 10605 10606 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 10607 return false; 10608 } 10609 10610 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 10611 return nullptr; 10612 } 10613 10614 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 10615 const OMPExecutableDirective &D, 10616 SourceLocation Loc, 10617 llvm::Function *OutlinedFn, 10618 ArrayRef<llvm::Value *> CapturedVars) { 10619 llvm_unreachable("Not supported in SIMD-only mode"); 10620 } 10621 10622 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10623 const Expr *NumTeams, 10624 const Expr *ThreadLimit, 10625 SourceLocation Loc) { 10626 llvm_unreachable("Not supported in SIMD-only mode"); 10627 } 10628 10629 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 10630 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10631 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10632 llvm_unreachable("Not supported in SIMD-only mode"); 10633 } 10634 10635 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 10636 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10637 const Expr *Device) { 10638 llvm_unreachable("Not supported in SIMD-only mode"); 10639 } 10640 10641 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10642 const OMPLoopDirective &D, 10643 ArrayRef<Expr *> NumIterations) { 10644 llvm_unreachable("Not supported in SIMD-only mode"); 10645 } 10646 10647 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10648 const OMPDependClause *C) { 10649 llvm_unreachable("Not supported in SIMD-only mode"); 10650 } 10651 10652 const VarDecl * 10653 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 10654 const VarDecl *NativeParam) const { 10655 llvm_unreachable("Not supported in SIMD-only mode"); 10656 } 10657 10658 Address 10659 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 10660 const VarDecl *NativeParam, 10661 const VarDecl *TargetParam) const { 10662 llvm_unreachable("Not supported in SIMD-only mode"); 10663 } 10664