1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 /// Describes ident structure that describes a source location. 461 /// All descriptions are taken from 462 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 463 /// Original structure: 464 /// typedef struct ident { 465 /// kmp_int32 reserved_1; /**< might be used in Fortran; 466 /// see above */ 467 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 468 /// KMP_IDENT_KMPC identifies this union 469 /// member */ 470 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 471 /// see above */ 472 ///#if USE_ITT_BUILD 473 /// /* but currently used for storing 474 /// region-specific ITT */ 475 /// /* contextual information. */ 476 ///#endif /* USE_ITT_BUILD */ 477 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 478 /// C++ */ 479 /// char const *psource; /**< String describing the source location. 480 /// The string is composed of semi-colon separated 481 // fields which describe the source file, 482 /// the function and a pair of line numbers that 483 /// delimit the construct. 484 /// */ 485 /// } ident_t; 486 enum IdentFieldIndex { 487 /// might be used in Fortran 488 IdentField_Reserved_1, 489 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 490 IdentField_Flags, 491 /// Not really used in Fortran any more 492 IdentField_Reserved_2, 493 /// Source[4] in Fortran, do not use for C++ 494 IdentField_Reserved_3, 495 /// String describing the source location. The string is composed of 496 /// semi-colon separated fields which describe the source file, the function 497 /// and a pair of line numbers that delimit the construct. 498 IdentField_PSource 499 }; 500 501 /// Schedule types for 'omp for' loops (these enumerators are taken from 502 /// the enum sched_type in kmp.h). 503 enum OpenMPSchedType { 504 /// Lower bound for default (unordered) versions. 505 OMP_sch_lower = 32, 506 OMP_sch_static_chunked = 33, 507 OMP_sch_static = 34, 508 OMP_sch_dynamic_chunked = 35, 509 OMP_sch_guided_chunked = 36, 510 OMP_sch_runtime = 37, 511 OMP_sch_auto = 38, 512 /// static with chunk adjustment (e.g., simd) 513 OMP_sch_static_balanced_chunked = 45, 514 /// Lower bound for 'ordered' versions. 515 OMP_ord_lower = 64, 516 OMP_ord_static_chunked = 65, 517 OMP_ord_static = 66, 518 OMP_ord_dynamic_chunked = 67, 519 OMP_ord_guided_chunked = 68, 520 OMP_ord_runtime = 69, 521 OMP_ord_auto = 70, 522 OMP_sch_default = OMP_sch_static, 523 /// dist_schedule types 524 OMP_dist_sch_static_chunked = 91, 525 OMP_dist_sch_static = 92, 526 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 527 /// Set if the monotonic schedule modifier was present. 528 OMP_sch_modifier_monotonic = (1 << 29), 529 /// Set if the nonmonotonic schedule modifier was present. 530 OMP_sch_modifier_nonmonotonic = (1 << 30), 531 }; 532 533 enum OpenMPRTLFunction { 534 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 535 /// kmpc_micro microtask, ...); 536 OMPRTL__kmpc_fork_call, 537 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 538 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 539 OMPRTL__kmpc_threadprivate_cached, 540 /// Call to void __kmpc_threadprivate_register( ident_t *, 541 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 542 OMPRTL__kmpc_threadprivate_register, 543 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 544 OMPRTL__kmpc_global_thread_num, 545 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 546 // kmp_critical_name *crit); 547 OMPRTL__kmpc_critical, 548 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 549 // global_tid, kmp_critical_name *crit, uintptr_t hint); 550 OMPRTL__kmpc_critical_with_hint, 551 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 552 // kmp_critical_name *crit); 553 OMPRTL__kmpc_end_critical, 554 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 555 // global_tid); 556 OMPRTL__kmpc_cancel_barrier, 557 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 558 OMPRTL__kmpc_barrier, 559 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 560 OMPRTL__kmpc_for_static_fini, 561 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 562 // global_tid); 563 OMPRTL__kmpc_serialized_parallel, 564 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 565 // global_tid); 566 OMPRTL__kmpc_end_serialized_parallel, 567 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 568 // kmp_int32 num_threads); 569 OMPRTL__kmpc_push_num_threads, 570 // Call to void __kmpc_flush(ident_t *loc); 571 OMPRTL__kmpc_flush, 572 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 573 OMPRTL__kmpc_master, 574 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 575 OMPRTL__kmpc_end_master, 576 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 577 // int end_part); 578 OMPRTL__kmpc_omp_taskyield, 579 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 580 OMPRTL__kmpc_single, 581 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 582 OMPRTL__kmpc_end_single, 583 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 584 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 585 // kmp_routine_entry_t *task_entry); 586 OMPRTL__kmpc_omp_task_alloc, 587 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 588 // new_task); 589 OMPRTL__kmpc_omp_task, 590 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 591 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 592 // kmp_int32 didit); 593 OMPRTL__kmpc_copyprivate, 594 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 595 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 596 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 597 OMPRTL__kmpc_reduce, 598 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 599 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 600 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 601 // *lck); 602 OMPRTL__kmpc_reduce_nowait, 603 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 604 // kmp_critical_name *lck); 605 OMPRTL__kmpc_end_reduce, 606 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 607 // kmp_critical_name *lck); 608 OMPRTL__kmpc_end_reduce_nowait, 609 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 610 // kmp_task_t * new_task); 611 OMPRTL__kmpc_omp_task_begin_if0, 612 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 613 // kmp_task_t * new_task); 614 OMPRTL__kmpc_omp_task_complete_if0, 615 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 616 OMPRTL__kmpc_ordered, 617 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 618 OMPRTL__kmpc_end_ordered, 619 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 620 // global_tid); 621 OMPRTL__kmpc_omp_taskwait, 622 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 623 OMPRTL__kmpc_taskgroup, 624 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 625 OMPRTL__kmpc_end_taskgroup, 626 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 627 // int proc_bind); 628 OMPRTL__kmpc_push_proc_bind, 629 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 630 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 631 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 632 OMPRTL__kmpc_omp_task_with_deps, 633 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 634 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 635 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 636 OMPRTL__kmpc_omp_wait_deps, 637 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 638 // global_tid, kmp_int32 cncl_kind); 639 OMPRTL__kmpc_cancellationpoint, 640 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 641 // kmp_int32 cncl_kind); 642 OMPRTL__kmpc_cancel, 643 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 644 // kmp_int32 num_teams, kmp_int32 thread_limit); 645 OMPRTL__kmpc_push_num_teams, 646 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 647 // microtask, ...); 648 OMPRTL__kmpc_fork_teams, 649 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 650 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 651 // sched, kmp_uint64 grainsize, void *task_dup); 652 OMPRTL__kmpc_taskloop, 653 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 654 // num_dims, struct kmp_dim *dims); 655 OMPRTL__kmpc_doacross_init, 656 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 657 OMPRTL__kmpc_doacross_fini, 658 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 659 // *vec); 660 OMPRTL__kmpc_doacross_post, 661 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 662 // *vec); 663 OMPRTL__kmpc_doacross_wait, 664 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 665 // *data); 666 OMPRTL__kmpc_task_reduction_init, 667 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 668 // *d); 669 OMPRTL__kmpc_task_reduction_get_th_data, 670 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 671 OMPRTL__kmpc_alloc, 672 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 673 OMPRTL__kmpc_free, 674 675 // 676 // Offloading related calls 677 // 678 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 679 // size); 680 OMPRTL__kmpc_push_target_tripcount, 681 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 682 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 683 // *arg_types); 684 OMPRTL__tgt_target, 685 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 686 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 687 // *arg_types); 688 OMPRTL__tgt_target_nowait, 689 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 690 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 691 // *arg_types, int32_t num_teams, int32_t thread_limit); 692 OMPRTL__tgt_target_teams, 693 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 694 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 695 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 696 OMPRTL__tgt_target_teams_nowait, 697 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 698 OMPRTL__tgt_register_lib, 699 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 700 OMPRTL__tgt_unregister_lib, 701 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 702 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 703 OMPRTL__tgt_target_data_begin, 704 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 705 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 706 // *arg_types); 707 OMPRTL__tgt_target_data_begin_nowait, 708 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 709 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 710 OMPRTL__tgt_target_data_end, 711 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 712 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 713 // *arg_types); 714 OMPRTL__tgt_target_data_end_nowait, 715 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 716 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 717 OMPRTL__tgt_target_data_update, 718 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 719 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 720 // *arg_types); 721 OMPRTL__tgt_target_data_update_nowait, 722 }; 723 724 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 725 /// region. 726 class CleanupTy final : public EHScopeStack::Cleanup { 727 PrePostActionTy *Action; 728 729 public: 730 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 731 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 732 if (!CGF.HaveInsertPoint()) 733 return; 734 Action->Exit(CGF); 735 } 736 }; 737 738 } // anonymous namespace 739 740 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 741 CodeGenFunction::RunCleanupsScope Scope(CGF); 742 if (PrePostAction) { 743 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 744 Callback(CodeGen, CGF, *PrePostAction); 745 } else { 746 PrePostActionTy Action; 747 Callback(CodeGen, CGF, Action); 748 } 749 } 750 751 /// Check if the combiner is a call to UDR combiner and if it is so return the 752 /// UDR decl used for reduction. 753 static const OMPDeclareReductionDecl * 754 getReductionInit(const Expr *ReductionOp) { 755 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 756 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 757 if (const auto *DRE = 758 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 759 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 760 return DRD; 761 return nullptr; 762 } 763 764 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 765 const OMPDeclareReductionDecl *DRD, 766 const Expr *InitOp, 767 Address Private, Address Original, 768 QualType Ty) { 769 if (DRD->getInitializer()) { 770 std::pair<llvm::Function *, llvm::Function *> Reduction = 771 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 772 const auto *CE = cast<CallExpr>(InitOp); 773 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 774 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 775 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 776 const auto *LHSDRE = 777 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 778 const auto *RHSDRE = 779 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 780 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 781 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 782 [=]() { return Private; }); 783 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 784 [=]() { return Original; }); 785 (void)PrivateScope.Privatize(); 786 RValue Func = RValue::get(Reduction.second); 787 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 788 CGF.EmitIgnoredExpr(InitOp); 789 } else { 790 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 791 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 792 auto *GV = new llvm::GlobalVariable( 793 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 794 llvm::GlobalValue::PrivateLinkage, Init, Name); 795 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 796 RValue InitRVal; 797 switch (CGF.getEvaluationKind(Ty)) { 798 case TEK_Scalar: 799 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 800 break; 801 case TEK_Complex: 802 InitRVal = 803 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 804 break; 805 case TEK_Aggregate: 806 InitRVal = RValue::getAggregate(LV.getAddress()); 807 break; 808 } 809 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 810 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 811 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 812 /*IsInitializer=*/false); 813 } 814 } 815 816 /// Emit initialization of arrays of complex types. 817 /// \param DestAddr Address of the array. 818 /// \param Type Type of array. 819 /// \param Init Initial expression of array. 820 /// \param SrcAddr Address of the original array. 821 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 822 QualType Type, bool EmitDeclareReductionInit, 823 const Expr *Init, 824 const OMPDeclareReductionDecl *DRD, 825 Address SrcAddr = Address::invalid()) { 826 // Perform element-by-element initialization. 827 QualType ElementTy; 828 829 // Drill down to the base element type on both arrays. 830 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 831 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 832 DestAddr = 833 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 834 if (DRD) 835 SrcAddr = 836 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 837 838 llvm::Value *SrcBegin = nullptr; 839 if (DRD) 840 SrcBegin = SrcAddr.getPointer(); 841 llvm::Value *DestBegin = DestAddr.getPointer(); 842 // Cast from pointer to array type to pointer to single element. 843 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 844 // The basic structure here is a while-do loop. 845 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 846 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 847 llvm::Value *IsEmpty = 848 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 849 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 850 851 // Enter the loop body, making that address the current address. 852 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 853 CGF.EmitBlock(BodyBB); 854 855 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 856 857 llvm::PHINode *SrcElementPHI = nullptr; 858 Address SrcElementCurrent = Address::invalid(); 859 if (DRD) { 860 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 861 "omp.arraycpy.srcElementPast"); 862 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 863 SrcElementCurrent = 864 Address(SrcElementPHI, 865 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 866 } 867 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 868 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 869 DestElementPHI->addIncoming(DestBegin, EntryBB); 870 Address DestElementCurrent = 871 Address(DestElementPHI, 872 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 873 874 // Emit copy. 875 { 876 CodeGenFunction::RunCleanupsScope InitScope(CGF); 877 if (EmitDeclareReductionInit) { 878 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 879 SrcElementCurrent, ElementTy); 880 } else 881 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 882 /*IsInitializer=*/false); 883 } 884 885 if (DRD) { 886 // Shift the address forward by one element. 887 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 888 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 889 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 890 } 891 892 // Shift the address forward by one element. 893 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 894 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 895 // Check whether we've reached the end. 896 llvm::Value *Done = 897 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 898 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 899 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 900 901 // Done. 902 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 903 } 904 905 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 906 return CGF.EmitOMPSharedLValue(E); 907 } 908 909 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 910 const Expr *E) { 911 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 912 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 913 return LValue(); 914 } 915 916 void ReductionCodeGen::emitAggregateInitialization( 917 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 918 const OMPDeclareReductionDecl *DRD) { 919 // Emit VarDecl with copy init for arrays. 920 // Get the address of the original variable captured in current 921 // captured region. 922 const auto *PrivateVD = 923 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 924 bool EmitDeclareReductionInit = 925 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 926 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 927 EmitDeclareReductionInit, 928 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 929 : PrivateVD->getInit(), 930 DRD, SharedLVal.getAddress()); 931 } 932 933 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 934 ArrayRef<const Expr *> Privates, 935 ArrayRef<const Expr *> ReductionOps) { 936 ClausesData.reserve(Shareds.size()); 937 SharedAddresses.reserve(Shareds.size()); 938 Sizes.reserve(Shareds.size()); 939 BaseDecls.reserve(Shareds.size()); 940 auto IPriv = Privates.begin(); 941 auto IRed = ReductionOps.begin(); 942 for (const Expr *Ref : Shareds) { 943 ClausesData.emplace_back(Ref, *IPriv, *IRed); 944 std::advance(IPriv, 1); 945 std::advance(IRed, 1); 946 } 947 } 948 949 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 950 assert(SharedAddresses.size() == N && 951 "Number of generated lvalues must be exactly N."); 952 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 953 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 954 SharedAddresses.emplace_back(First, Second); 955 } 956 957 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 958 const auto *PrivateVD = 959 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 960 QualType PrivateType = PrivateVD->getType(); 961 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 962 if (!PrivateType->isVariablyModifiedType()) { 963 Sizes.emplace_back( 964 CGF.getTypeSize( 965 SharedAddresses[N].first.getType().getNonReferenceType()), 966 nullptr); 967 return; 968 } 969 llvm::Value *Size; 970 llvm::Value *SizeInChars; 971 auto *ElemType = 972 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 973 ->getElementType(); 974 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 975 if (AsArraySection) { 976 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 977 SharedAddresses[N].first.getPointer()); 978 Size = CGF.Builder.CreateNUWAdd( 979 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 980 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 981 } else { 982 SizeInChars = CGF.getTypeSize( 983 SharedAddresses[N].first.getType().getNonReferenceType()); 984 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 985 } 986 Sizes.emplace_back(SizeInChars, Size); 987 CodeGenFunction::OpaqueValueMapping OpaqueMap( 988 CGF, 989 cast<OpaqueValueExpr>( 990 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 991 RValue::get(Size)); 992 CGF.EmitVariablyModifiedType(PrivateType); 993 } 994 995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 996 llvm::Value *Size) { 997 const auto *PrivateVD = 998 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 999 QualType PrivateType = PrivateVD->getType(); 1000 if (!PrivateType->isVariablyModifiedType()) { 1001 assert(!Size && !Sizes[N].second && 1002 "Size should be nullptr for non-variably modified reduction " 1003 "items."); 1004 return; 1005 } 1006 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1007 CGF, 1008 cast<OpaqueValueExpr>( 1009 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1010 RValue::get(Size)); 1011 CGF.EmitVariablyModifiedType(PrivateType); 1012 } 1013 1014 void ReductionCodeGen::emitInitialization( 1015 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1016 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1017 assert(SharedAddresses.size() > N && "No variable was generated"); 1018 const auto *PrivateVD = 1019 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 QualType PrivateType = PrivateVD->getType(); 1023 PrivateAddr = CGF.Builder.CreateElementBitCast( 1024 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1025 QualType SharedType = SharedAddresses[N].first.getType(); 1026 SharedLVal = CGF.MakeAddrLValue( 1027 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1028 CGF.ConvertTypeForMem(SharedType)), 1029 SharedType, SharedAddresses[N].first.getBaseInfo(), 1030 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1031 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1032 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1033 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1034 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1035 PrivateAddr, SharedLVal.getAddress(), 1036 SharedLVal.getType()); 1037 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1038 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1039 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1040 PrivateVD->getType().getQualifiers(), 1041 /*IsInitializer=*/false); 1042 } 1043 } 1044 1045 bool ReductionCodeGen::needCleanups(unsigned N) { 1046 const auto *PrivateVD = 1047 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1048 QualType PrivateType = PrivateVD->getType(); 1049 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1050 return DTorKind != QualType::DK_none; 1051 } 1052 1053 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1054 Address PrivateAddr) { 1055 const auto *PrivateVD = 1056 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1057 QualType PrivateType = PrivateVD->getType(); 1058 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1059 if (needCleanups(N)) { 1060 PrivateAddr = CGF.Builder.CreateElementBitCast( 1061 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1062 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1063 } 1064 } 1065 1066 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1067 LValue BaseLV) { 1068 BaseTy = BaseTy.getNonReferenceType(); 1069 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1070 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1071 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1072 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1073 } else { 1074 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1075 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1076 } 1077 BaseTy = BaseTy->getPointeeType(); 1078 } 1079 return CGF.MakeAddrLValue( 1080 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1081 CGF.ConvertTypeForMem(ElTy)), 1082 BaseLV.getType(), BaseLV.getBaseInfo(), 1083 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1084 } 1085 1086 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1087 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1088 llvm::Value *Addr) { 1089 Address Tmp = Address::invalid(); 1090 Address TopTmp = Address::invalid(); 1091 Address MostTopTmp = Address::invalid(); 1092 BaseTy = BaseTy.getNonReferenceType(); 1093 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1094 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1095 Tmp = CGF.CreateMemTemp(BaseTy); 1096 if (TopTmp.isValid()) 1097 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1098 else 1099 MostTopTmp = Tmp; 1100 TopTmp = Tmp; 1101 BaseTy = BaseTy->getPointeeType(); 1102 } 1103 llvm::Type *Ty = BaseLVType; 1104 if (Tmp.isValid()) 1105 Ty = Tmp.getElementType(); 1106 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1107 if (Tmp.isValid()) { 1108 CGF.Builder.CreateStore(Addr, Tmp); 1109 return MostTopTmp; 1110 } 1111 return Address(Addr, BaseLVAlignment); 1112 } 1113 1114 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1115 const VarDecl *OrigVD = nullptr; 1116 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1117 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1118 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1119 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1120 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1121 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1122 DE = cast<DeclRefExpr>(Base); 1123 OrigVD = cast<VarDecl>(DE->getDecl()); 1124 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1125 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1126 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1127 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1128 DE = cast<DeclRefExpr>(Base); 1129 OrigVD = cast<VarDecl>(DE->getDecl()); 1130 } 1131 return OrigVD; 1132 } 1133 1134 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1135 Address PrivateAddr) { 1136 const DeclRefExpr *DE; 1137 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1138 BaseDecls.emplace_back(OrigVD); 1139 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1140 LValue BaseLValue = 1141 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1142 OriginalBaseLValue); 1143 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1144 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1145 llvm::Value *PrivatePointer = 1146 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1147 PrivateAddr.getPointer(), 1148 SharedAddresses[N].first.getAddress().getType()); 1149 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1150 return castToBase(CGF, OrigVD->getType(), 1151 SharedAddresses[N].first.getType(), 1152 OriginalBaseLValue.getAddress().getType(), 1153 OriginalBaseLValue.getAlignment(), Ptr); 1154 } 1155 BaseDecls.emplace_back( 1156 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1157 return PrivateAddr; 1158 } 1159 1160 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1161 const OMPDeclareReductionDecl *DRD = 1162 getReductionInit(ClausesData[N].ReductionOp); 1163 return DRD && DRD->getInitializer(); 1164 } 1165 1166 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1167 return CGF.EmitLoadOfPointerLValue( 1168 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1169 getThreadIDVariable()->getType()->castAs<PointerType>()); 1170 } 1171 1172 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1173 if (!CGF.HaveInsertPoint()) 1174 return; 1175 // 1.2.2 OpenMP Language Terminology 1176 // Structured block - An executable statement with a single entry at the 1177 // top and a single exit at the bottom. 1178 // The point of exit cannot be a branch out of the structured block. 1179 // longjmp() and throw() must not violate the entry/exit criteria. 1180 CGF.EHStack.pushTerminate(); 1181 CodeGen(CGF); 1182 CGF.EHStack.popTerminate(); 1183 } 1184 1185 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1186 CodeGenFunction &CGF) { 1187 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1188 getThreadIDVariable()->getType(), 1189 AlignmentSource::Decl); 1190 } 1191 1192 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1193 QualType FieldTy) { 1194 auto *Field = FieldDecl::Create( 1195 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1196 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1197 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1198 Field->setAccess(AS_public); 1199 DC->addDecl(Field); 1200 return Field; 1201 } 1202 1203 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1204 StringRef Separator) 1205 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1206 OffloadEntriesInfoManager(CGM) { 1207 ASTContext &C = CGM.getContext(); 1208 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1209 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1210 RD->startDefinition(); 1211 // reserved_1 1212 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1213 // flags 1214 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1215 // reserved_2 1216 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1217 // reserved_3 1218 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1219 // psource 1220 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1221 RD->completeDefinition(); 1222 IdentQTy = C.getRecordType(RD); 1223 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1224 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1225 1226 loadOffloadInfoMetadata(); 1227 } 1228 1229 void CGOpenMPRuntime::clear() { 1230 InternalVars.clear(); 1231 // Clean non-target variable declarations possibly used only in debug info. 1232 for (const auto &Data : EmittedNonTargetVariables) { 1233 if (!Data.getValue().pointsToAliveValue()) 1234 continue; 1235 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1236 if (!GV) 1237 continue; 1238 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1239 continue; 1240 GV->eraseFromParent(); 1241 } 1242 } 1243 1244 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1245 SmallString<128> Buffer; 1246 llvm::raw_svector_ostream OS(Buffer); 1247 StringRef Sep = FirstSeparator; 1248 for (StringRef Part : Parts) { 1249 OS << Sep << Part; 1250 Sep = Separator; 1251 } 1252 return OS.str(); 1253 } 1254 1255 static llvm::Function * 1256 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1257 const Expr *CombinerInitializer, const VarDecl *In, 1258 const VarDecl *Out, bool IsCombiner) { 1259 // void .omp_combiner.(Ty *in, Ty *out); 1260 ASTContext &C = CGM.getContext(); 1261 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1262 FunctionArgList Args; 1263 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1264 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1265 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1266 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1267 Args.push_back(&OmpOutParm); 1268 Args.push_back(&OmpInParm); 1269 const CGFunctionInfo &FnInfo = 1270 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1271 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1272 std::string Name = CGM.getOpenMPRuntime().getName( 1273 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1274 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1275 Name, &CGM.getModule()); 1276 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1277 if (CGM.getLangOpts().Optimize) { 1278 Fn->removeFnAttr(llvm::Attribute::NoInline); 1279 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1280 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1281 } 1282 CodeGenFunction CGF(CGM); 1283 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1284 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1285 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1286 Out->getLocation()); 1287 CodeGenFunction::OMPPrivateScope Scope(CGF); 1288 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1289 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1290 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1291 .getAddress(); 1292 }); 1293 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1294 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1295 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1296 .getAddress(); 1297 }); 1298 (void)Scope.Privatize(); 1299 if (!IsCombiner && Out->hasInit() && 1300 !CGF.isTrivialInitializer(Out->getInit())) { 1301 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1302 Out->getType().getQualifiers(), 1303 /*IsInitializer=*/true); 1304 } 1305 if (CombinerInitializer) 1306 CGF.EmitIgnoredExpr(CombinerInitializer); 1307 Scope.ForceCleanup(); 1308 CGF.FinishFunction(); 1309 return Fn; 1310 } 1311 1312 void CGOpenMPRuntime::emitUserDefinedReduction( 1313 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1314 if (UDRMap.count(D) > 0) 1315 return; 1316 llvm::Function *Combiner = emitCombinerOrInitializer( 1317 CGM, D->getType(), D->getCombiner(), 1318 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1319 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1320 /*IsCombiner=*/true); 1321 llvm::Function *Initializer = nullptr; 1322 if (const Expr *Init = D->getInitializer()) { 1323 Initializer = emitCombinerOrInitializer( 1324 CGM, D->getType(), 1325 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1326 : nullptr, 1327 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1328 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1329 /*IsCombiner=*/false); 1330 } 1331 UDRMap.try_emplace(D, Combiner, Initializer); 1332 if (CGF) { 1333 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1334 Decls.second.push_back(D); 1335 } 1336 } 1337 1338 std::pair<llvm::Function *, llvm::Function *> 1339 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1340 auto I = UDRMap.find(D); 1341 if (I != UDRMap.end()) 1342 return I->second; 1343 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1344 return UDRMap.lookup(D); 1345 } 1346 1347 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1348 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1349 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1350 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1351 assert(ThreadIDVar->getType()->isPointerType() && 1352 "thread id variable must be of type kmp_int32 *"); 1353 CodeGenFunction CGF(CGM, true); 1354 bool HasCancel = false; 1355 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1356 HasCancel = OPD->hasCancel(); 1357 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1358 HasCancel = OPSD->hasCancel(); 1359 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1360 HasCancel = OPFD->hasCancel(); 1361 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1362 HasCancel = OPFD->hasCancel(); 1363 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1364 HasCancel = OPFD->hasCancel(); 1365 else if (const auto *OPFD = 1366 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1367 HasCancel = OPFD->hasCancel(); 1368 else if (const auto *OPFD = 1369 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1370 HasCancel = OPFD->hasCancel(); 1371 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1372 HasCancel, OutlinedHelperName); 1373 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1374 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1375 } 1376 1377 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1378 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1379 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1380 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1381 return emitParallelOrTeamsOutlinedFunction( 1382 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1383 } 1384 1385 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1386 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1387 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1388 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1389 return emitParallelOrTeamsOutlinedFunction( 1390 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1391 } 1392 1393 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1394 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1395 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1396 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1397 bool Tied, unsigned &NumberOfParts) { 1398 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1399 PrePostActionTy &) { 1400 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1401 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1402 llvm::Value *TaskArgs[] = { 1403 UpLoc, ThreadID, 1404 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1405 TaskTVar->getType()->castAs<PointerType>()) 1406 .getPointer()}; 1407 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1408 }; 1409 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1410 UntiedCodeGen); 1411 CodeGen.setAction(Action); 1412 assert(!ThreadIDVar->getType()->isPointerType() && 1413 "thread id variable must be of type kmp_int32 for tasks"); 1414 const OpenMPDirectiveKind Region = 1415 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1416 : OMPD_task; 1417 const CapturedStmt *CS = D.getCapturedStmt(Region); 1418 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1419 CodeGenFunction CGF(CGM, true); 1420 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1421 InnermostKind, 1422 TD ? TD->hasCancel() : false, Action); 1423 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1424 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1425 if (!Tied) 1426 NumberOfParts = Action.getNumberOfParts(); 1427 return Res; 1428 } 1429 1430 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1431 const RecordDecl *RD, const CGRecordLayout &RL, 1432 ArrayRef<llvm::Constant *> Data) { 1433 llvm::StructType *StructTy = RL.getLLVMType(); 1434 unsigned PrevIdx = 0; 1435 ConstantInitBuilder CIBuilder(CGM); 1436 auto DI = Data.begin(); 1437 for (const FieldDecl *FD : RD->fields()) { 1438 unsigned Idx = RL.getLLVMFieldNo(FD); 1439 // Fill the alignment. 1440 for (unsigned I = PrevIdx; I < Idx; ++I) 1441 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1442 PrevIdx = Idx + 1; 1443 Fields.add(*DI); 1444 ++DI; 1445 } 1446 } 1447 1448 template <class... As> 1449 static llvm::GlobalVariable * 1450 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1451 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1452 As &&... Args) { 1453 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1454 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1455 ConstantInitBuilder CIBuilder(CGM); 1456 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1457 buildStructValue(Fields, CGM, RD, RL, Data); 1458 return Fields.finishAndCreateGlobal( 1459 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1460 std::forward<As>(Args)...); 1461 } 1462 1463 template <typename T> 1464 static void 1465 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1466 ArrayRef<llvm::Constant *> Data, 1467 T &Parent) { 1468 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1469 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1470 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1471 buildStructValue(Fields, CGM, RD, RL, Data); 1472 Fields.finishAndAddTo(Parent); 1473 } 1474 1475 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1476 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1477 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1478 FlagsTy FlagsKey(Flags, Reserved2Flags); 1479 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1480 if (!Entry) { 1481 if (!DefaultOpenMPPSource) { 1482 // Initialize default location for psource field of ident_t structure of 1483 // all ident_t objects. Format is ";file;function;line;column;;". 1484 // Taken from 1485 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1486 DefaultOpenMPPSource = 1487 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1488 DefaultOpenMPPSource = 1489 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1490 } 1491 1492 llvm::Constant *Data[] = { 1493 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1494 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1495 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1496 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1497 llvm::GlobalValue *DefaultOpenMPLocation = 1498 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1499 llvm::GlobalValue::PrivateLinkage); 1500 DefaultOpenMPLocation->setUnnamedAddr( 1501 llvm::GlobalValue::UnnamedAddr::Global); 1502 1503 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1504 } 1505 return Address(Entry, Align); 1506 } 1507 1508 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1509 bool AtCurrentPoint) { 1510 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1511 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1512 1513 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1514 if (AtCurrentPoint) { 1515 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1516 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1517 } else { 1518 Elem.second.ServiceInsertPt = 1519 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1520 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1521 } 1522 } 1523 1524 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1525 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1526 if (Elem.second.ServiceInsertPt) { 1527 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1528 Elem.second.ServiceInsertPt = nullptr; 1529 Ptr->eraseFromParent(); 1530 } 1531 } 1532 1533 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1534 SourceLocation Loc, 1535 unsigned Flags) { 1536 Flags |= OMP_IDENT_KMPC; 1537 // If no debug info is generated - return global default location. 1538 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1539 Loc.isInvalid()) 1540 return getOrCreateDefaultLocation(Flags).getPointer(); 1541 1542 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1543 1544 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1545 Address LocValue = Address::invalid(); 1546 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1547 if (I != OpenMPLocThreadIDMap.end()) 1548 LocValue = Address(I->second.DebugLoc, Align); 1549 1550 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1551 // GetOpenMPThreadID was called before this routine. 1552 if (!LocValue.isValid()) { 1553 // Generate "ident_t .kmpc_loc.addr;" 1554 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1555 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1556 Elem.second.DebugLoc = AI.getPointer(); 1557 LocValue = AI; 1558 1559 if (!Elem.second.ServiceInsertPt) 1560 setLocThreadIdInsertPt(CGF); 1561 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1562 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1563 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1564 CGF.getTypeSize(IdentQTy)); 1565 } 1566 1567 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1568 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1569 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1570 LValue PSource = 1571 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1572 1573 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1574 if (OMPDebugLoc == nullptr) { 1575 SmallString<128> Buffer2; 1576 llvm::raw_svector_ostream OS2(Buffer2); 1577 // Build debug location 1578 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1579 OS2 << ";" << PLoc.getFilename() << ";"; 1580 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1581 OS2 << FD->getQualifiedNameAsString(); 1582 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1583 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1584 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1585 } 1586 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1587 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1588 1589 // Our callers always pass this to a runtime function, so for 1590 // convenience, go ahead and return a naked pointer. 1591 return LocValue.getPointer(); 1592 } 1593 1594 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1595 SourceLocation Loc) { 1596 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1597 1598 llvm::Value *ThreadID = nullptr; 1599 // Check whether we've already cached a load of the thread id in this 1600 // function. 1601 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1602 if (I != OpenMPLocThreadIDMap.end()) { 1603 ThreadID = I->second.ThreadID; 1604 if (ThreadID != nullptr) 1605 return ThreadID; 1606 } 1607 // If exceptions are enabled, do not use parameter to avoid possible crash. 1608 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1609 !CGF.getLangOpts().CXXExceptions || 1610 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1611 if (auto *OMPRegionInfo = 1612 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1613 if (OMPRegionInfo->getThreadIDVariable()) { 1614 // Check if this an outlined function with thread id passed as argument. 1615 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1616 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1617 // If value loaded in entry block, cache it and use it everywhere in 1618 // function. 1619 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1620 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1621 Elem.second.ThreadID = ThreadID; 1622 } 1623 return ThreadID; 1624 } 1625 } 1626 } 1627 1628 // This is not an outlined function region - need to call __kmpc_int32 1629 // kmpc_global_thread_num(ident_t *loc). 1630 // Generate thread id value and cache this value for use across the 1631 // function. 1632 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1633 if (!Elem.second.ServiceInsertPt) 1634 setLocThreadIdInsertPt(CGF); 1635 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1636 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1637 llvm::CallInst *Call = CGF.Builder.CreateCall( 1638 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1639 emitUpdateLocation(CGF, Loc)); 1640 Call->setCallingConv(CGF.getRuntimeCC()); 1641 Elem.second.ThreadID = Call; 1642 return Call; 1643 } 1644 1645 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1646 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1647 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1648 clearLocThreadIdInsertPt(CGF); 1649 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1650 } 1651 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1652 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1653 UDRMap.erase(D); 1654 FunctionUDRMap.erase(CGF.CurFn); 1655 } 1656 } 1657 1658 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1659 return IdentTy->getPointerTo(); 1660 } 1661 1662 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1663 if (!Kmpc_MicroTy) { 1664 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1665 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1666 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1667 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1668 } 1669 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1670 } 1671 1672 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1673 llvm::FunctionCallee RTLFn = nullptr; 1674 switch (static_cast<OpenMPRTLFunction>(Function)) { 1675 case OMPRTL__kmpc_fork_call: { 1676 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1677 // microtask, ...); 1678 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1679 getKmpc_MicroPointerTy()}; 1680 auto *FnTy = 1681 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1682 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1683 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1684 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1685 llvm::LLVMContext &Ctx = F->getContext(); 1686 llvm::MDBuilder MDB(Ctx); 1687 // Annotate the callback behavior of the __kmpc_fork_call: 1688 // - The callback callee is argument number 2 (microtask). 1689 // - The first two arguments of the callback callee are unknown (-1). 1690 // - All variadic arguments to the __kmpc_fork_call are passed to the 1691 // callback callee. 1692 F->addMetadata( 1693 llvm::LLVMContext::MD_callback, 1694 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1695 2, {-1, -1}, 1696 /* VarArgsArePassed */ true)})); 1697 } 1698 } 1699 break; 1700 } 1701 case OMPRTL__kmpc_global_thread_num: { 1702 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1703 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1704 auto *FnTy = 1705 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1706 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1707 break; 1708 } 1709 case OMPRTL__kmpc_threadprivate_cached: { 1710 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1711 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1712 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1713 CGM.VoidPtrTy, CGM.SizeTy, 1714 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1715 auto *FnTy = 1716 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1717 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1718 break; 1719 } 1720 case OMPRTL__kmpc_critical: { 1721 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1722 // kmp_critical_name *crit); 1723 llvm::Type *TypeParams[] = { 1724 getIdentTyPointerTy(), CGM.Int32Ty, 1725 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1726 auto *FnTy = 1727 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1728 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1729 break; 1730 } 1731 case OMPRTL__kmpc_critical_with_hint: { 1732 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1733 // kmp_critical_name *crit, uintptr_t hint); 1734 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1735 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1736 CGM.IntPtrTy}; 1737 auto *FnTy = 1738 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1739 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1740 break; 1741 } 1742 case OMPRTL__kmpc_threadprivate_register: { 1743 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1744 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1745 // typedef void *(*kmpc_ctor)(void *); 1746 auto *KmpcCtorTy = 1747 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1748 /*isVarArg*/ false)->getPointerTo(); 1749 // typedef void *(*kmpc_cctor)(void *, void *); 1750 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1751 auto *KmpcCopyCtorTy = 1752 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1753 /*isVarArg*/ false) 1754 ->getPointerTo(); 1755 // typedef void (*kmpc_dtor)(void *); 1756 auto *KmpcDtorTy = 1757 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1758 ->getPointerTo(); 1759 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1760 KmpcCopyCtorTy, KmpcDtorTy}; 1761 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1762 /*isVarArg*/ false); 1763 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1764 break; 1765 } 1766 case OMPRTL__kmpc_end_critical: { 1767 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1768 // kmp_critical_name *crit); 1769 llvm::Type *TypeParams[] = { 1770 getIdentTyPointerTy(), CGM.Int32Ty, 1771 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1772 auto *FnTy = 1773 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1774 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1775 break; 1776 } 1777 case OMPRTL__kmpc_cancel_barrier: { 1778 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1779 // global_tid); 1780 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1781 auto *FnTy = 1782 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1783 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1784 break; 1785 } 1786 case OMPRTL__kmpc_barrier: { 1787 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1788 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1789 auto *FnTy = 1790 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1791 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1792 break; 1793 } 1794 case OMPRTL__kmpc_for_static_fini: { 1795 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1796 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1797 auto *FnTy = 1798 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1799 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1800 break; 1801 } 1802 case OMPRTL__kmpc_push_num_threads: { 1803 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1804 // kmp_int32 num_threads) 1805 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1806 CGM.Int32Ty}; 1807 auto *FnTy = 1808 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1809 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1810 break; 1811 } 1812 case OMPRTL__kmpc_serialized_parallel: { 1813 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1814 // global_tid); 1815 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1816 auto *FnTy = 1817 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1818 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1819 break; 1820 } 1821 case OMPRTL__kmpc_end_serialized_parallel: { 1822 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1823 // global_tid); 1824 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1825 auto *FnTy = 1826 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1827 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1828 break; 1829 } 1830 case OMPRTL__kmpc_flush: { 1831 // Build void __kmpc_flush(ident_t *loc); 1832 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1833 auto *FnTy = 1834 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1835 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1836 break; 1837 } 1838 case OMPRTL__kmpc_master: { 1839 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1840 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1841 auto *FnTy = 1842 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1843 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1844 break; 1845 } 1846 case OMPRTL__kmpc_end_master: { 1847 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1848 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1849 auto *FnTy = 1850 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1851 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1852 break; 1853 } 1854 case OMPRTL__kmpc_omp_taskyield: { 1855 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1856 // int end_part); 1857 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1858 auto *FnTy = 1859 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1860 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1861 break; 1862 } 1863 case OMPRTL__kmpc_single: { 1864 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1865 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1866 auto *FnTy = 1867 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1868 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1869 break; 1870 } 1871 case OMPRTL__kmpc_end_single: { 1872 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1873 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1874 auto *FnTy = 1875 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1876 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1877 break; 1878 } 1879 case OMPRTL__kmpc_omp_task_alloc: { 1880 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1881 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1882 // kmp_routine_entry_t *task_entry); 1883 assert(KmpRoutineEntryPtrTy != nullptr && 1884 "Type kmp_routine_entry_t must be created."); 1885 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1886 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1887 // Return void * and then cast to particular kmp_task_t type. 1888 auto *FnTy = 1889 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1890 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1891 break; 1892 } 1893 case OMPRTL__kmpc_omp_task: { 1894 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1895 // *new_task); 1896 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1897 CGM.VoidPtrTy}; 1898 auto *FnTy = 1899 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1900 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1901 break; 1902 } 1903 case OMPRTL__kmpc_copyprivate: { 1904 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1905 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1906 // kmp_int32 didit); 1907 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1908 auto *CpyFnTy = 1909 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1910 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1911 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1912 CGM.Int32Ty}; 1913 auto *FnTy = 1914 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1915 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1916 break; 1917 } 1918 case OMPRTL__kmpc_reduce: { 1919 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1920 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1921 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1922 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1923 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1924 /*isVarArg=*/false); 1925 llvm::Type *TypeParams[] = { 1926 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1927 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1928 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1929 auto *FnTy = 1930 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1931 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1932 break; 1933 } 1934 case OMPRTL__kmpc_reduce_nowait: { 1935 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1936 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1937 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1938 // *lck); 1939 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1940 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1941 /*isVarArg=*/false); 1942 llvm::Type *TypeParams[] = { 1943 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1944 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1945 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1946 auto *FnTy = 1947 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1948 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1949 break; 1950 } 1951 case OMPRTL__kmpc_end_reduce: { 1952 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1953 // kmp_critical_name *lck); 1954 llvm::Type *TypeParams[] = { 1955 getIdentTyPointerTy(), CGM.Int32Ty, 1956 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1957 auto *FnTy = 1958 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1959 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1960 break; 1961 } 1962 case OMPRTL__kmpc_end_reduce_nowait: { 1963 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1964 // kmp_critical_name *lck); 1965 llvm::Type *TypeParams[] = { 1966 getIdentTyPointerTy(), CGM.Int32Ty, 1967 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1968 auto *FnTy = 1969 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1970 RTLFn = 1971 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1972 break; 1973 } 1974 case OMPRTL__kmpc_omp_task_begin_if0: { 1975 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1976 // *new_task); 1977 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1978 CGM.VoidPtrTy}; 1979 auto *FnTy = 1980 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1981 RTLFn = 1982 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1983 break; 1984 } 1985 case OMPRTL__kmpc_omp_task_complete_if0: { 1986 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1987 // *new_task); 1988 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1989 CGM.VoidPtrTy}; 1990 auto *FnTy = 1991 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1992 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1993 /*Name=*/"__kmpc_omp_task_complete_if0"); 1994 break; 1995 } 1996 case OMPRTL__kmpc_ordered: { 1997 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1998 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1999 auto *FnTy = 2000 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2001 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2002 break; 2003 } 2004 case OMPRTL__kmpc_end_ordered: { 2005 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2006 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2007 auto *FnTy = 2008 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2009 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2010 break; 2011 } 2012 case OMPRTL__kmpc_omp_taskwait: { 2013 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2014 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2015 auto *FnTy = 2016 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2017 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2018 break; 2019 } 2020 case OMPRTL__kmpc_taskgroup: { 2021 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2022 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2023 auto *FnTy = 2024 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2025 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2026 break; 2027 } 2028 case OMPRTL__kmpc_end_taskgroup: { 2029 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2030 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2031 auto *FnTy = 2032 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2033 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2034 break; 2035 } 2036 case OMPRTL__kmpc_push_proc_bind: { 2037 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2038 // int proc_bind) 2039 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2040 auto *FnTy = 2041 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2042 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2043 break; 2044 } 2045 case OMPRTL__kmpc_omp_task_with_deps: { 2046 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2047 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2048 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2049 llvm::Type *TypeParams[] = { 2050 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2051 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2052 auto *FnTy = 2053 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2054 RTLFn = 2055 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2056 break; 2057 } 2058 case OMPRTL__kmpc_omp_wait_deps: { 2059 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2060 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2061 // kmp_depend_info_t *noalias_dep_list); 2062 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2063 CGM.Int32Ty, CGM.VoidPtrTy, 2064 CGM.Int32Ty, CGM.VoidPtrTy}; 2065 auto *FnTy = 2066 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2067 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2068 break; 2069 } 2070 case OMPRTL__kmpc_cancellationpoint: { 2071 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2072 // global_tid, kmp_int32 cncl_kind) 2073 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2074 auto *FnTy = 2075 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2076 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2077 break; 2078 } 2079 case OMPRTL__kmpc_cancel: { 2080 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2081 // kmp_int32 cncl_kind) 2082 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2083 auto *FnTy = 2084 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2085 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2086 break; 2087 } 2088 case OMPRTL__kmpc_push_num_teams: { 2089 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2090 // kmp_int32 num_teams, kmp_int32 num_threads) 2091 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2092 CGM.Int32Ty}; 2093 auto *FnTy = 2094 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2095 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2096 break; 2097 } 2098 case OMPRTL__kmpc_fork_teams: { 2099 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2100 // microtask, ...); 2101 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2102 getKmpc_MicroPointerTy()}; 2103 auto *FnTy = 2104 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2105 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2106 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2107 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2108 llvm::LLVMContext &Ctx = F->getContext(); 2109 llvm::MDBuilder MDB(Ctx); 2110 // Annotate the callback behavior of the __kmpc_fork_teams: 2111 // - The callback callee is argument number 2 (microtask). 2112 // - The first two arguments of the callback callee are unknown (-1). 2113 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2114 // callback callee. 2115 F->addMetadata( 2116 llvm::LLVMContext::MD_callback, 2117 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2118 2, {-1, -1}, 2119 /* VarArgsArePassed */ true)})); 2120 } 2121 } 2122 break; 2123 } 2124 case OMPRTL__kmpc_taskloop: { 2125 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2126 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2127 // sched, kmp_uint64 grainsize, void *task_dup); 2128 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2129 CGM.IntTy, 2130 CGM.VoidPtrTy, 2131 CGM.IntTy, 2132 CGM.Int64Ty->getPointerTo(), 2133 CGM.Int64Ty->getPointerTo(), 2134 CGM.Int64Ty, 2135 CGM.IntTy, 2136 CGM.IntTy, 2137 CGM.Int64Ty, 2138 CGM.VoidPtrTy}; 2139 auto *FnTy = 2140 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2141 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2142 break; 2143 } 2144 case OMPRTL__kmpc_doacross_init: { 2145 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2146 // num_dims, struct kmp_dim *dims); 2147 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2148 CGM.Int32Ty, 2149 CGM.Int32Ty, 2150 CGM.VoidPtrTy}; 2151 auto *FnTy = 2152 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2153 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2154 break; 2155 } 2156 case OMPRTL__kmpc_doacross_fini: { 2157 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2158 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2159 auto *FnTy = 2160 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2161 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2162 break; 2163 } 2164 case OMPRTL__kmpc_doacross_post: { 2165 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2166 // *vec); 2167 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2168 CGM.Int64Ty->getPointerTo()}; 2169 auto *FnTy = 2170 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2171 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2172 break; 2173 } 2174 case OMPRTL__kmpc_doacross_wait: { 2175 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2176 // *vec); 2177 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2178 CGM.Int64Ty->getPointerTo()}; 2179 auto *FnTy = 2180 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2181 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2182 break; 2183 } 2184 case OMPRTL__kmpc_task_reduction_init: { 2185 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2186 // *data); 2187 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2188 auto *FnTy = 2189 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2190 RTLFn = 2191 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2192 break; 2193 } 2194 case OMPRTL__kmpc_task_reduction_get_th_data: { 2195 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2196 // *d); 2197 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2198 auto *FnTy = 2199 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2200 RTLFn = CGM.CreateRuntimeFunction( 2201 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2202 break; 2203 } 2204 case OMPRTL__kmpc_alloc: { 2205 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2206 // al); omp_allocator_handle_t type is void *. 2207 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2208 auto *FnTy = 2209 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2210 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2211 break; 2212 } 2213 case OMPRTL__kmpc_free: { 2214 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2215 // al); omp_allocator_handle_t type is void *. 2216 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2217 auto *FnTy = 2218 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2219 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2220 break; 2221 } 2222 case OMPRTL__kmpc_push_target_tripcount: { 2223 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2224 // size); 2225 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2226 llvm::FunctionType *FnTy = 2227 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2228 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2229 break; 2230 } 2231 case OMPRTL__tgt_target: { 2232 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2233 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2234 // *arg_types); 2235 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2236 CGM.VoidPtrTy, 2237 CGM.Int32Ty, 2238 CGM.VoidPtrPtrTy, 2239 CGM.VoidPtrPtrTy, 2240 CGM.SizeTy->getPointerTo(), 2241 CGM.Int64Ty->getPointerTo()}; 2242 auto *FnTy = 2243 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2244 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2245 break; 2246 } 2247 case OMPRTL__tgt_target_nowait: { 2248 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2249 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2250 // int64_t *arg_types); 2251 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2252 CGM.VoidPtrTy, 2253 CGM.Int32Ty, 2254 CGM.VoidPtrPtrTy, 2255 CGM.VoidPtrPtrTy, 2256 CGM.SizeTy->getPointerTo(), 2257 CGM.Int64Ty->getPointerTo()}; 2258 auto *FnTy = 2259 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2260 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2261 break; 2262 } 2263 case OMPRTL__tgt_target_teams: { 2264 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2265 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2266 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2267 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2268 CGM.VoidPtrTy, 2269 CGM.Int32Ty, 2270 CGM.VoidPtrPtrTy, 2271 CGM.VoidPtrPtrTy, 2272 CGM.SizeTy->getPointerTo(), 2273 CGM.Int64Ty->getPointerTo(), 2274 CGM.Int32Ty, 2275 CGM.Int32Ty}; 2276 auto *FnTy = 2277 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2278 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2279 break; 2280 } 2281 case OMPRTL__tgt_target_teams_nowait: { 2282 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2283 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2284 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2285 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2286 CGM.VoidPtrTy, 2287 CGM.Int32Ty, 2288 CGM.VoidPtrPtrTy, 2289 CGM.VoidPtrPtrTy, 2290 CGM.SizeTy->getPointerTo(), 2291 CGM.Int64Ty->getPointerTo(), 2292 CGM.Int32Ty, 2293 CGM.Int32Ty}; 2294 auto *FnTy = 2295 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2296 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2297 break; 2298 } 2299 case OMPRTL__tgt_register_lib: { 2300 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2301 QualType ParamTy = 2302 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2303 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2304 auto *FnTy = 2305 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2306 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2307 break; 2308 } 2309 case OMPRTL__tgt_unregister_lib: { 2310 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2311 QualType ParamTy = 2312 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2313 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2314 auto *FnTy = 2315 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2316 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2317 break; 2318 } 2319 case OMPRTL__tgt_target_data_begin: { 2320 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2321 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2322 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2323 CGM.Int32Ty, 2324 CGM.VoidPtrPtrTy, 2325 CGM.VoidPtrPtrTy, 2326 CGM.SizeTy->getPointerTo(), 2327 CGM.Int64Ty->getPointerTo()}; 2328 auto *FnTy = 2329 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2330 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2331 break; 2332 } 2333 case OMPRTL__tgt_target_data_begin_nowait: { 2334 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2335 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2336 // *arg_types); 2337 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2338 CGM.Int32Ty, 2339 CGM.VoidPtrPtrTy, 2340 CGM.VoidPtrPtrTy, 2341 CGM.SizeTy->getPointerTo(), 2342 CGM.Int64Ty->getPointerTo()}; 2343 auto *FnTy = 2344 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2345 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2346 break; 2347 } 2348 case OMPRTL__tgt_target_data_end: { 2349 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2350 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2351 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2352 CGM.Int32Ty, 2353 CGM.VoidPtrPtrTy, 2354 CGM.VoidPtrPtrTy, 2355 CGM.SizeTy->getPointerTo(), 2356 CGM.Int64Ty->getPointerTo()}; 2357 auto *FnTy = 2358 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2359 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2360 break; 2361 } 2362 case OMPRTL__tgt_target_data_end_nowait: { 2363 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2364 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2365 // *arg_types); 2366 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2367 CGM.Int32Ty, 2368 CGM.VoidPtrPtrTy, 2369 CGM.VoidPtrPtrTy, 2370 CGM.SizeTy->getPointerTo(), 2371 CGM.Int64Ty->getPointerTo()}; 2372 auto *FnTy = 2373 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2374 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2375 break; 2376 } 2377 case OMPRTL__tgt_target_data_update: { 2378 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2379 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2380 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2381 CGM.Int32Ty, 2382 CGM.VoidPtrPtrTy, 2383 CGM.VoidPtrPtrTy, 2384 CGM.SizeTy->getPointerTo(), 2385 CGM.Int64Ty->getPointerTo()}; 2386 auto *FnTy = 2387 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2388 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2389 break; 2390 } 2391 case OMPRTL__tgt_target_data_update_nowait: { 2392 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2393 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2394 // *arg_types); 2395 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2396 CGM.Int32Ty, 2397 CGM.VoidPtrPtrTy, 2398 CGM.VoidPtrPtrTy, 2399 CGM.SizeTy->getPointerTo(), 2400 CGM.Int64Ty->getPointerTo()}; 2401 auto *FnTy = 2402 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2403 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2404 break; 2405 } 2406 } 2407 assert(RTLFn && "Unable to find OpenMP runtime function"); 2408 return RTLFn; 2409 } 2410 2411 llvm::FunctionCallee 2412 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2413 assert((IVSize == 32 || IVSize == 64) && 2414 "IV size is not compatible with the omp runtime"); 2415 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2416 : "__kmpc_for_static_init_4u") 2417 : (IVSigned ? "__kmpc_for_static_init_8" 2418 : "__kmpc_for_static_init_8u"); 2419 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2420 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2421 llvm::Type *TypeParams[] = { 2422 getIdentTyPointerTy(), // loc 2423 CGM.Int32Ty, // tid 2424 CGM.Int32Ty, // schedtype 2425 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2426 PtrTy, // p_lower 2427 PtrTy, // p_upper 2428 PtrTy, // p_stride 2429 ITy, // incr 2430 ITy // chunk 2431 }; 2432 auto *FnTy = 2433 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2434 return CGM.CreateRuntimeFunction(FnTy, Name); 2435 } 2436 2437 llvm::FunctionCallee 2438 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2439 assert((IVSize == 32 || IVSize == 64) && 2440 "IV size is not compatible with the omp runtime"); 2441 StringRef Name = 2442 IVSize == 32 2443 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2444 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2445 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2446 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2447 CGM.Int32Ty, // tid 2448 CGM.Int32Ty, // schedtype 2449 ITy, // lower 2450 ITy, // upper 2451 ITy, // stride 2452 ITy // chunk 2453 }; 2454 auto *FnTy = 2455 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2456 return CGM.CreateRuntimeFunction(FnTy, Name); 2457 } 2458 2459 llvm::FunctionCallee 2460 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2461 assert((IVSize == 32 || IVSize == 64) && 2462 "IV size is not compatible with the omp runtime"); 2463 StringRef Name = 2464 IVSize == 32 2465 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2466 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2467 llvm::Type *TypeParams[] = { 2468 getIdentTyPointerTy(), // loc 2469 CGM.Int32Ty, // tid 2470 }; 2471 auto *FnTy = 2472 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2473 return CGM.CreateRuntimeFunction(FnTy, Name); 2474 } 2475 2476 llvm::FunctionCallee 2477 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2478 assert((IVSize == 32 || IVSize == 64) && 2479 "IV size is not compatible with the omp runtime"); 2480 StringRef Name = 2481 IVSize == 32 2482 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2483 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2484 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2485 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2486 llvm::Type *TypeParams[] = { 2487 getIdentTyPointerTy(), // loc 2488 CGM.Int32Ty, // tid 2489 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2490 PtrTy, // p_lower 2491 PtrTy, // p_upper 2492 PtrTy // p_stride 2493 }; 2494 auto *FnTy = 2495 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2496 return CGM.CreateRuntimeFunction(FnTy, Name); 2497 } 2498 2499 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { 2500 if (CGM.getLangOpts().OpenMPSimd) 2501 return Address::invalid(); 2502 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2503 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2504 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { 2505 SmallString<64> PtrName; 2506 { 2507 llvm::raw_svector_ostream OS(PtrName); 2508 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; 2509 } 2510 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2511 if (!Ptr) { 2512 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2513 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2514 PtrName); 2515 if (!CGM.getLangOpts().OpenMPIsDevice) { 2516 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2517 GV->setLinkage(llvm::GlobalValue::ExternalLinkage); 2518 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2519 } 2520 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); 2521 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2522 } 2523 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2524 } 2525 return Address::invalid(); 2526 } 2527 2528 llvm::Constant * 2529 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2530 assert(!CGM.getLangOpts().OpenMPUseTLS || 2531 !CGM.getContext().getTargetInfo().isTLSSupported()); 2532 // Lookup the entry, lazily creating it if necessary. 2533 std::string Suffix = getName({"cache", ""}); 2534 return getOrCreateInternalVariable( 2535 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2536 } 2537 2538 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2539 const VarDecl *VD, 2540 Address VDAddr, 2541 SourceLocation Loc) { 2542 if (CGM.getLangOpts().OpenMPUseTLS && 2543 CGM.getContext().getTargetInfo().isTLSSupported()) 2544 return VDAddr; 2545 2546 llvm::Type *VarTy = VDAddr.getElementType(); 2547 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2548 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2549 CGM.Int8PtrTy), 2550 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2551 getOrCreateThreadPrivateCache(VD)}; 2552 return Address(CGF.EmitRuntimeCall( 2553 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2554 VDAddr.getAlignment()); 2555 } 2556 2557 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2558 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2559 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2560 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2561 // library. 2562 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2563 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2564 OMPLoc); 2565 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2566 // to register constructor/destructor for variable. 2567 llvm::Value *Args[] = { 2568 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2569 Ctor, CopyCtor, Dtor}; 2570 CGF.EmitRuntimeCall( 2571 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2572 } 2573 2574 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2575 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2576 bool PerformInit, CodeGenFunction *CGF) { 2577 if (CGM.getLangOpts().OpenMPUseTLS && 2578 CGM.getContext().getTargetInfo().isTLSSupported()) 2579 return nullptr; 2580 2581 VD = VD->getDefinition(CGM.getContext()); 2582 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2583 QualType ASTTy = VD->getType(); 2584 2585 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2586 const Expr *Init = VD->getAnyInitializer(); 2587 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2588 // Generate function that re-emits the declaration's initializer into the 2589 // threadprivate copy of the variable VD 2590 CodeGenFunction CtorCGF(CGM); 2591 FunctionArgList Args; 2592 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2593 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2594 ImplicitParamDecl::Other); 2595 Args.push_back(&Dst); 2596 2597 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2598 CGM.getContext().VoidPtrTy, Args); 2599 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2600 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2601 llvm::Function *Fn = 2602 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2603 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2604 Args, Loc, Loc); 2605 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2606 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2607 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2608 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2609 Arg = CtorCGF.Builder.CreateElementBitCast( 2610 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2611 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2612 /*IsInitializer=*/true); 2613 ArgVal = CtorCGF.EmitLoadOfScalar( 2614 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2615 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2616 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2617 CtorCGF.FinishFunction(); 2618 Ctor = Fn; 2619 } 2620 if (VD->getType().isDestructedType() != QualType::DK_none) { 2621 // Generate function that emits destructor call for the threadprivate copy 2622 // of the variable VD 2623 CodeGenFunction DtorCGF(CGM); 2624 FunctionArgList Args; 2625 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2626 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2627 ImplicitParamDecl::Other); 2628 Args.push_back(&Dst); 2629 2630 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2631 CGM.getContext().VoidTy, Args); 2632 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2633 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2634 llvm::Function *Fn = 2635 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2636 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2637 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2638 Loc, Loc); 2639 // Create a scope with an artificial location for the body of this function. 2640 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2641 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2642 DtorCGF.GetAddrOfLocalVar(&Dst), 2643 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2644 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2645 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2646 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2647 DtorCGF.FinishFunction(); 2648 Dtor = Fn; 2649 } 2650 // Do not emit init function if it is not required. 2651 if (!Ctor && !Dtor) 2652 return nullptr; 2653 2654 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2655 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2656 /*isVarArg=*/false) 2657 ->getPointerTo(); 2658 // Copying constructor for the threadprivate variable. 2659 // Must be NULL - reserved by runtime, but currently it requires that this 2660 // parameter is always NULL. Otherwise it fires assertion. 2661 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2662 if (Ctor == nullptr) { 2663 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2664 /*isVarArg=*/false) 2665 ->getPointerTo(); 2666 Ctor = llvm::Constant::getNullValue(CtorTy); 2667 } 2668 if (Dtor == nullptr) { 2669 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2670 /*isVarArg=*/false) 2671 ->getPointerTo(); 2672 Dtor = llvm::Constant::getNullValue(DtorTy); 2673 } 2674 if (!CGF) { 2675 auto *InitFunctionTy = 2676 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2677 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2678 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2679 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2680 CodeGenFunction InitCGF(CGM); 2681 FunctionArgList ArgList; 2682 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2683 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2684 Loc, Loc); 2685 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2686 InitCGF.FinishFunction(); 2687 return InitFunction; 2688 } 2689 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2690 } 2691 return nullptr; 2692 } 2693 2694 /// Obtain information that uniquely identifies a target entry. This 2695 /// consists of the file and device IDs as well as line number associated with 2696 /// the relevant entry source location. 2697 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2698 unsigned &DeviceID, unsigned &FileID, 2699 unsigned &LineNum) { 2700 SourceManager &SM = C.getSourceManager(); 2701 2702 // The loc should be always valid and have a file ID (the user cannot use 2703 // #pragma directives in macros) 2704 2705 assert(Loc.isValid() && "Source location is expected to be always valid."); 2706 2707 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2708 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2709 2710 llvm::sys::fs::UniqueID ID; 2711 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2712 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2713 << PLoc.getFilename() << EC.message(); 2714 2715 DeviceID = ID.getDevice(); 2716 FileID = ID.getFile(); 2717 LineNum = PLoc.getLine(); 2718 } 2719 2720 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2721 llvm::GlobalVariable *Addr, 2722 bool PerformInit) { 2723 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2724 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2725 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) 2726 return CGM.getLangOpts().OpenMPIsDevice; 2727 VD = VD->getDefinition(CGM.getContext()); 2728 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2729 return CGM.getLangOpts().OpenMPIsDevice; 2730 2731 QualType ASTTy = VD->getType(); 2732 2733 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2734 // Produce the unique prefix to identify the new target regions. We use 2735 // the source location of the variable declaration which we know to not 2736 // conflict with any target region. 2737 unsigned DeviceID; 2738 unsigned FileID; 2739 unsigned Line; 2740 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2741 SmallString<128> Buffer, Out; 2742 { 2743 llvm::raw_svector_ostream OS(Buffer); 2744 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2745 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2746 } 2747 2748 const Expr *Init = VD->getAnyInitializer(); 2749 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2750 llvm::Constant *Ctor; 2751 llvm::Constant *ID; 2752 if (CGM.getLangOpts().OpenMPIsDevice) { 2753 // Generate function that re-emits the declaration's initializer into 2754 // the threadprivate copy of the variable VD 2755 CodeGenFunction CtorCGF(CGM); 2756 2757 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2758 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2759 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2760 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2761 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2762 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2763 FunctionArgList(), Loc, Loc); 2764 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2765 CtorCGF.EmitAnyExprToMem(Init, 2766 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2767 Init->getType().getQualifiers(), 2768 /*IsInitializer=*/true); 2769 CtorCGF.FinishFunction(); 2770 Ctor = Fn; 2771 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2772 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2773 } else { 2774 Ctor = new llvm::GlobalVariable( 2775 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2776 llvm::GlobalValue::PrivateLinkage, 2777 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2778 ID = Ctor; 2779 } 2780 2781 // Register the information for the entry associated with the constructor. 2782 Out.clear(); 2783 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2784 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2785 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2786 } 2787 if (VD->getType().isDestructedType() != QualType::DK_none) { 2788 llvm::Constant *Dtor; 2789 llvm::Constant *ID; 2790 if (CGM.getLangOpts().OpenMPIsDevice) { 2791 // Generate function that emits destructor call for the threadprivate 2792 // copy of the variable VD 2793 CodeGenFunction DtorCGF(CGM); 2794 2795 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2796 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2797 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2798 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2799 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2800 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2801 FunctionArgList(), Loc, Loc); 2802 // Create a scope with an artificial location for the body of this 2803 // function. 2804 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2805 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2806 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2807 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2808 DtorCGF.FinishFunction(); 2809 Dtor = Fn; 2810 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2811 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2812 } else { 2813 Dtor = new llvm::GlobalVariable( 2814 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2815 llvm::GlobalValue::PrivateLinkage, 2816 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2817 ID = Dtor; 2818 } 2819 // Register the information for the entry associated with the destructor. 2820 Out.clear(); 2821 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2822 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2823 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2824 } 2825 return CGM.getLangOpts().OpenMPIsDevice; 2826 } 2827 2828 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2829 QualType VarType, 2830 StringRef Name) { 2831 std::string Suffix = getName({"artificial", ""}); 2832 std::string CacheSuffix = getName({"cache", ""}); 2833 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2834 llvm::Value *GAddr = 2835 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2836 llvm::Value *Args[] = { 2837 emitUpdateLocation(CGF, SourceLocation()), 2838 getThreadID(CGF, SourceLocation()), 2839 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2840 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2841 /*IsSigned=*/false), 2842 getOrCreateInternalVariable( 2843 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2844 return Address( 2845 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2846 CGF.EmitRuntimeCall( 2847 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2848 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2849 CGM.getPointerAlign()); 2850 } 2851 2852 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2853 const RegionCodeGenTy &ThenGen, 2854 const RegionCodeGenTy &ElseGen) { 2855 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2856 2857 // If the condition constant folds and can be elided, try to avoid emitting 2858 // the condition and the dead arm of the if/else. 2859 bool CondConstant; 2860 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2861 if (CondConstant) 2862 ThenGen(CGF); 2863 else 2864 ElseGen(CGF); 2865 return; 2866 } 2867 2868 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2869 // emit the conditional branch. 2870 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2871 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2872 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2873 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2874 2875 // Emit the 'then' code. 2876 CGF.EmitBlock(ThenBlock); 2877 ThenGen(CGF); 2878 CGF.EmitBranch(ContBlock); 2879 // Emit the 'else' code if present. 2880 // There is no need to emit line number for unconditional branch. 2881 (void)ApplyDebugLocation::CreateEmpty(CGF); 2882 CGF.EmitBlock(ElseBlock); 2883 ElseGen(CGF); 2884 // There is no need to emit line number for unconditional branch. 2885 (void)ApplyDebugLocation::CreateEmpty(CGF); 2886 CGF.EmitBranch(ContBlock); 2887 // Emit the continuation block for code after the if. 2888 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2889 } 2890 2891 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2892 llvm::Function *OutlinedFn, 2893 ArrayRef<llvm::Value *> CapturedVars, 2894 const Expr *IfCond) { 2895 if (!CGF.HaveInsertPoint()) 2896 return; 2897 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2898 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2899 PrePostActionTy &) { 2900 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2901 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2902 llvm::Value *Args[] = { 2903 RTLoc, 2904 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2905 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2906 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2907 RealArgs.append(std::begin(Args), std::end(Args)); 2908 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2909 2910 llvm::FunctionCallee RTLFn = 2911 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2912 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2913 }; 2914 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2915 PrePostActionTy &) { 2916 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2917 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2918 // Build calls: 2919 // __kmpc_serialized_parallel(&Loc, GTid); 2920 llvm::Value *Args[] = {RTLoc, ThreadID}; 2921 CGF.EmitRuntimeCall( 2922 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2923 2924 // OutlinedFn(>id, &zero, CapturedStruct); 2925 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2926 /*Name*/ ".zero.addr"); 2927 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2928 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2929 // ThreadId for serialized parallels is 0. 2930 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2931 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2932 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2933 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2934 2935 // __kmpc_end_serialized_parallel(&Loc, GTid); 2936 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2937 CGF.EmitRuntimeCall( 2938 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2939 EndArgs); 2940 }; 2941 if (IfCond) { 2942 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2943 } else { 2944 RegionCodeGenTy ThenRCG(ThenGen); 2945 ThenRCG(CGF); 2946 } 2947 } 2948 2949 // If we're inside an (outlined) parallel region, use the region info's 2950 // thread-ID variable (it is passed in a first argument of the outlined function 2951 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2952 // regular serial code region, get thread ID by calling kmp_int32 2953 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2954 // return the address of that temp. 2955 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2956 SourceLocation Loc) { 2957 if (auto *OMPRegionInfo = 2958 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2959 if (OMPRegionInfo->getThreadIDVariable()) 2960 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2961 2962 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2963 QualType Int32Ty = 2964 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2965 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2966 CGF.EmitStoreOfScalar(ThreadID, 2967 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2968 2969 return ThreadIDTemp; 2970 } 2971 2972 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2973 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2974 SmallString<256> Buffer; 2975 llvm::raw_svector_ostream Out(Buffer); 2976 Out << Name; 2977 StringRef RuntimeName = Out.str(); 2978 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2979 if (Elem.second) { 2980 assert(Elem.second->getType()->getPointerElementType() == Ty && 2981 "OMP internal variable has different type than requested"); 2982 return &*Elem.second; 2983 } 2984 2985 return Elem.second = new llvm::GlobalVariable( 2986 CGM.getModule(), Ty, /*IsConstant*/ false, 2987 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2988 Elem.first(), /*InsertBefore=*/nullptr, 2989 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2990 } 2991 2992 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2993 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2994 std::string Name = getName({Prefix, "var"}); 2995 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2996 } 2997 2998 namespace { 2999 /// Common pre(post)-action for different OpenMP constructs. 3000 class CommonActionTy final : public PrePostActionTy { 3001 llvm::FunctionCallee EnterCallee; 3002 ArrayRef<llvm::Value *> EnterArgs; 3003 llvm::FunctionCallee ExitCallee; 3004 ArrayRef<llvm::Value *> ExitArgs; 3005 bool Conditional; 3006 llvm::BasicBlock *ContBlock = nullptr; 3007 3008 public: 3009 CommonActionTy(llvm::FunctionCallee EnterCallee, 3010 ArrayRef<llvm::Value *> EnterArgs, 3011 llvm::FunctionCallee ExitCallee, 3012 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3013 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3014 ExitArgs(ExitArgs), Conditional(Conditional) {} 3015 void Enter(CodeGenFunction &CGF) override { 3016 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3017 if (Conditional) { 3018 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3019 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3020 ContBlock = CGF.createBasicBlock("omp_if.end"); 3021 // Generate the branch (If-stmt) 3022 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3023 CGF.EmitBlock(ThenBlock); 3024 } 3025 } 3026 void Done(CodeGenFunction &CGF) { 3027 // Emit the rest of blocks/branches 3028 CGF.EmitBranch(ContBlock); 3029 CGF.EmitBlock(ContBlock, true); 3030 } 3031 void Exit(CodeGenFunction &CGF) override { 3032 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3033 } 3034 }; 3035 } // anonymous namespace 3036 3037 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3038 StringRef CriticalName, 3039 const RegionCodeGenTy &CriticalOpGen, 3040 SourceLocation Loc, const Expr *Hint) { 3041 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3042 // CriticalOpGen(); 3043 // __kmpc_end_critical(ident_t *, gtid, Lock); 3044 // Prepare arguments and build a call to __kmpc_critical 3045 if (!CGF.HaveInsertPoint()) 3046 return; 3047 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3048 getCriticalRegionLock(CriticalName)}; 3049 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3050 std::end(Args)); 3051 if (Hint) { 3052 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3053 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3054 } 3055 CommonActionTy Action( 3056 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3057 : OMPRTL__kmpc_critical), 3058 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3059 CriticalOpGen.setAction(Action); 3060 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3061 } 3062 3063 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3064 const RegionCodeGenTy &MasterOpGen, 3065 SourceLocation Loc) { 3066 if (!CGF.HaveInsertPoint()) 3067 return; 3068 // if(__kmpc_master(ident_t *, gtid)) { 3069 // MasterOpGen(); 3070 // __kmpc_end_master(ident_t *, gtid); 3071 // } 3072 // Prepare arguments and build a call to __kmpc_master 3073 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3074 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3075 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3076 /*Conditional=*/true); 3077 MasterOpGen.setAction(Action); 3078 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3079 Action.Done(CGF); 3080 } 3081 3082 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3083 SourceLocation Loc) { 3084 if (!CGF.HaveInsertPoint()) 3085 return; 3086 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3087 llvm::Value *Args[] = { 3088 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3089 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3090 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3091 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3092 Region->emitUntiedSwitch(CGF); 3093 } 3094 3095 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3096 const RegionCodeGenTy &TaskgroupOpGen, 3097 SourceLocation Loc) { 3098 if (!CGF.HaveInsertPoint()) 3099 return; 3100 // __kmpc_taskgroup(ident_t *, gtid); 3101 // TaskgroupOpGen(); 3102 // __kmpc_end_taskgroup(ident_t *, gtid); 3103 // Prepare arguments and build a call to __kmpc_taskgroup 3104 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3105 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3106 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3107 Args); 3108 TaskgroupOpGen.setAction(Action); 3109 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3110 } 3111 3112 /// Given an array of pointers to variables, project the address of a 3113 /// given variable. 3114 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3115 unsigned Index, const VarDecl *Var) { 3116 // Pull out the pointer to the variable. 3117 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3118 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3119 3120 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3121 Addr = CGF.Builder.CreateElementBitCast( 3122 Addr, CGF.ConvertTypeForMem(Var->getType())); 3123 return Addr; 3124 } 3125 3126 static llvm::Value *emitCopyprivateCopyFunction( 3127 CodeGenModule &CGM, llvm::Type *ArgsType, 3128 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3129 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3130 SourceLocation Loc) { 3131 ASTContext &C = CGM.getContext(); 3132 // void copy_func(void *LHSArg, void *RHSArg); 3133 FunctionArgList Args; 3134 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3135 ImplicitParamDecl::Other); 3136 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3137 ImplicitParamDecl::Other); 3138 Args.push_back(&LHSArg); 3139 Args.push_back(&RHSArg); 3140 const auto &CGFI = 3141 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3142 std::string Name = 3143 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3144 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3145 llvm::GlobalValue::InternalLinkage, Name, 3146 &CGM.getModule()); 3147 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3148 Fn->setDoesNotRecurse(); 3149 CodeGenFunction CGF(CGM); 3150 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3151 // Dest = (void*[n])(LHSArg); 3152 // Src = (void*[n])(RHSArg); 3153 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3154 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3155 ArgsType), CGF.getPointerAlign()); 3156 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3157 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3158 ArgsType), CGF.getPointerAlign()); 3159 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3160 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3161 // ... 3162 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3163 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3164 const auto *DestVar = 3165 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3166 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3167 3168 const auto *SrcVar = 3169 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3170 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3171 3172 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3173 QualType Type = VD->getType(); 3174 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3175 } 3176 CGF.FinishFunction(); 3177 return Fn; 3178 } 3179 3180 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3181 const RegionCodeGenTy &SingleOpGen, 3182 SourceLocation Loc, 3183 ArrayRef<const Expr *> CopyprivateVars, 3184 ArrayRef<const Expr *> SrcExprs, 3185 ArrayRef<const Expr *> DstExprs, 3186 ArrayRef<const Expr *> AssignmentOps) { 3187 if (!CGF.HaveInsertPoint()) 3188 return; 3189 assert(CopyprivateVars.size() == SrcExprs.size() && 3190 CopyprivateVars.size() == DstExprs.size() && 3191 CopyprivateVars.size() == AssignmentOps.size()); 3192 ASTContext &C = CGM.getContext(); 3193 // int32 did_it = 0; 3194 // if(__kmpc_single(ident_t *, gtid)) { 3195 // SingleOpGen(); 3196 // __kmpc_end_single(ident_t *, gtid); 3197 // did_it = 1; 3198 // } 3199 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3200 // <copy_func>, did_it); 3201 3202 Address DidIt = Address::invalid(); 3203 if (!CopyprivateVars.empty()) { 3204 // int32 did_it = 0; 3205 QualType KmpInt32Ty = 3206 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3207 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3208 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3209 } 3210 // Prepare arguments and build a call to __kmpc_single 3211 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3212 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3213 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3214 /*Conditional=*/true); 3215 SingleOpGen.setAction(Action); 3216 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3217 if (DidIt.isValid()) { 3218 // did_it = 1; 3219 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3220 } 3221 Action.Done(CGF); 3222 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3223 // <copy_func>, did_it); 3224 if (DidIt.isValid()) { 3225 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3226 QualType CopyprivateArrayTy = 3227 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3228 /*IndexTypeQuals=*/0); 3229 // Create a list of all private variables for copyprivate. 3230 Address CopyprivateList = 3231 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3232 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3233 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3234 CGF.Builder.CreateStore( 3235 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3236 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3237 Elem); 3238 } 3239 // Build function that copies private values from single region to all other 3240 // threads in the corresponding parallel region. 3241 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3242 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3243 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3244 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3245 Address CL = 3246 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3247 CGF.VoidPtrTy); 3248 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3249 llvm::Value *Args[] = { 3250 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3251 getThreadID(CGF, Loc), // i32 <gtid> 3252 BufSize, // size_t <buf_size> 3253 CL.getPointer(), // void *<copyprivate list> 3254 CpyFn, // void (*) (void *, void *) <copy_func> 3255 DidItVal // i32 did_it 3256 }; 3257 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3258 } 3259 } 3260 3261 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3262 const RegionCodeGenTy &OrderedOpGen, 3263 SourceLocation Loc, bool IsThreads) { 3264 if (!CGF.HaveInsertPoint()) 3265 return; 3266 // __kmpc_ordered(ident_t *, gtid); 3267 // OrderedOpGen(); 3268 // __kmpc_end_ordered(ident_t *, gtid); 3269 // Prepare arguments and build a call to __kmpc_ordered 3270 if (IsThreads) { 3271 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3272 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3273 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3274 Args); 3275 OrderedOpGen.setAction(Action); 3276 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3277 return; 3278 } 3279 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3280 } 3281 3282 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3283 unsigned Flags; 3284 if (Kind == OMPD_for) 3285 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3286 else if (Kind == OMPD_sections) 3287 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3288 else if (Kind == OMPD_single) 3289 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3290 else if (Kind == OMPD_barrier) 3291 Flags = OMP_IDENT_BARRIER_EXPL; 3292 else 3293 Flags = OMP_IDENT_BARRIER_IMPL; 3294 return Flags; 3295 } 3296 3297 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3298 CodeGenFunction &CGF, const OMPLoopDirective &S, 3299 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3300 // Check if the loop directive is actually a doacross loop directive. In this 3301 // case choose static, 1 schedule. 3302 if (llvm::any_of( 3303 S.getClausesOfKind<OMPOrderedClause>(), 3304 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3305 ScheduleKind = OMPC_SCHEDULE_static; 3306 // Chunk size is 1 in this case. 3307 llvm::APInt ChunkSize(32, 1); 3308 ChunkExpr = IntegerLiteral::Create( 3309 CGF.getContext(), ChunkSize, 3310 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3311 SourceLocation()); 3312 } 3313 } 3314 3315 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3316 OpenMPDirectiveKind Kind, bool EmitChecks, 3317 bool ForceSimpleCall) { 3318 if (!CGF.HaveInsertPoint()) 3319 return; 3320 // Build call __kmpc_cancel_barrier(loc, thread_id); 3321 // Build call __kmpc_barrier(loc, thread_id); 3322 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3323 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3324 // thread_id); 3325 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3326 getThreadID(CGF, Loc)}; 3327 if (auto *OMPRegionInfo = 3328 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3329 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3330 llvm::Value *Result = CGF.EmitRuntimeCall( 3331 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3332 if (EmitChecks) { 3333 // if (__kmpc_cancel_barrier()) { 3334 // exit from construct; 3335 // } 3336 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3337 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3338 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3339 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3340 CGF.EmitBlock(ExitBB); 3341 // exit from construct; 3342 CodeGenFunction::JumpDest CancelDestination = 3343 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3344 CGF.EmitBranchThroughCleanup(CancelDestination); 3345 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3346 } 3347 return; 3348 } 3349 } 3350 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3351 } 3352 3353 /// Map the OpenMP loop schedule to the runtime enumeration. 3354 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3355 bool Chunked, bool Ordered) { 3356 switch (ScheduleKind) { 3357 case OMPC_SCHEDULE_static: 3358 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3359 : (Ordered ? OMP_ord_static : OMP_sch_static); 3360 case OMPC_SCHEDULE_dynamic: 3361 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3362 case OMPC_SCHEDULE_guided: 3363 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3364 case OMPC_SCHEDULE_runtime: 3365 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3366 case OMPC_SCHEDULE_auto: 3367 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3368 case OMPC_SCHEDULE_unknown: 3369 assert(!Chunked && "chunk was specified but schedule kind not known"); 3370 return Ordered ? OMP_ord_static : OMP_sch_static; 3371 } 3372 llvm_unreachable("Unexpected runtime schedule"); 3373 } 3374 3375 /// Map the OpenMP distribute schedule to the runtime enumeration. 3376 static OpenMPSchedType 3377 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3378 // only static is allowed for dist_schedule 3379 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3380 } 3381 3382 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3383 bool Chunked) const { 3384 OpenMPSchedType Schedule = 3385 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3386 return Schedule == OMP_sch_static; 3387 } 3388 3389 bool CGOpenMPRuntime::isStaticNonchunked( 3390 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3391 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3392 return Schedule == OMP_dist_sch_static; 3393 } 3394 3395 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3396 bool Chunked) const { 3397 OpenMPSchedType Schedule = 3398 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3399 return Schedule == OMP_sch_static_chunked; 3400 } 3401 3402 bool CGOpenMPRuntime::isStaticChunked( 3403 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3404 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3405 return Schedule == OMP_dist_sch_static_chunked; 3406 } 3407 3408 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3409 OpenMPSchedType Schedule = 3410 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3411 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3412 return Schedule != OMP_sch_static; 3413 } 3414 3415 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3416 OpenMPScheduleClauseModifier M1, 3417 OpenMPScheduleClauseModifier M2) { 3418 int Modifier = 0; 3419 switch (M1) { 3420 case OMPC_SCHEDULE_MODIFIER_monotonic: 3421 Modifier = OMP_sch_modifier_monotonic; 3422 break; 3423 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3424 Modifier = OMP_sch_modifier_nonmonotonic; 3425 break; 3426 case OMPC_SCHEDULE_MODIFIER_simd: 3427 if (Schedule == OMP_sch_static_chunked) 3428 Schedule = OMP_sch_static_balanced_chunked; 3429 break; 3430 case OMPC_SCHEDULE_MODIFIER_last: 3431 case OMPC_SCHEDULE_MODIFIER_unknown: 3432 break; 3433 } 3434 switch (M2) { 3435 case OMPC_SCHEDULE_MODIFIER_monotonic: 3436 Modifier = OMP_sch_modifier_monotonic; 3437 break; 3438 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3439 Modifier = OMP_sch_modifier_nonmonotonic; 3440 break; 3441 case OMPC_SCHEDULE_MODIFIER_simd: 3442 if (Schedule == OMP_sch_static_chunked) 3443 Schedule = OMP_sch_static_balanced_chunked; 3444 break; 3445 case OMPC_SCHEDULE_MODIFIER_last: 3446 case OMPC_SCHEDULE_MODIFIER_unknown: 3447 break; 3448 } 3449 return Schedule | Modifier; 3450 } 3451 3452 void CGOpenMPRuntime::emitForDispatchInit( 3453 CodeGenFunction &CGF, SourceLocation Loc, 3454 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3455 bool Ordered, const DispatchRTInput &DispatchValues) { 3456 if (!CGF.HaveInsertPoint()) 3457 return; 3458 OpenMPSchedType Schedule = getRuntimeSchedule( 3459 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3460 assert(Ordered || 3461 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3462 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3463 Schedule != OMP_sch_static_balanced_chunked)); 3464 // Call __kmpc_dispatch_init( 3465 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3466 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3467 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3468 3469 // If the Chunk was not specified in the clause - use default value 1. 3470 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3471 : CGF.Builder.getIntN(IVSize, 1); 3472 llvm::Value *Args[] = { 3473 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3474 CGF.Builder.getInt32(addMonoNonMonoModifier( 3475 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3476 DispatchValues.LB, // Lower 3477 DispatchValues.UB, // Upper 3478 CGF.Builder.getIntN(IVSize, 1), // Stride 3479 Chunk // Chunk 3480 }; 3481 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3482 } 3483 3484 static void emitForStaticInitCall( 3485 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3486 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3487 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3488 const CGOpenMPRuntime::StaticRTInput &Values) { 3489 if (!CGF.HaveInsertPoint()) 3490 return; 3491 3492 assert(!Values.Ordered); 3493 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3494 Schedule == OMP_sch_static_balanced_chunked || 3495 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3496 Schedule == OMP_dist_sch_static || 3497 Schedule == OMP_dist_sch_static_chunked); 3498 3499 // Call __kmpc_for_static_init( 3500 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3501 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3502 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3503 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3504 llvm::Value *Chunk = Values.Chunk; 3505 if (Chunk == nullptr) { 3506 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3507 Schedule == OMP_dist_sch_static) && 3508 "expected static non-chunked schedule"); 3509 // If the Chunk was not specified in the clause - use default value 1. 3510 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3511 } else { 3512 assert((Schedule == OMP_sch_static_chunked || 3513 Schedule == OMP_sch_static_balanced_chunked || 3514 Schedule == OMP_ord_static_chunked || 3515 Schedule == OMP_dist_sch_static_chunked) && 3516 "expected static chunked schedule"); 3517 } 3518 llvm::Value *Args[] = { 3519 UpdateLocation, 3520 ThreadId, 3521 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3522 M2)), // Schedule type 3523 Values.IL.getPointer(), // &isLastIter 3524 Values.LB.getPointer(), // &LB 3525 Values.UB.getPointer(), // &UB 3526 Values.ST.getPointer(), // &Stride 3527 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3528 Chunk // Chunk 3529 }; 3530 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3531 } 3532 3533 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3534 SourceLocation Loc, 3535 OpenMPDirectiveKind DKind, 3536 const OpenMPScheduleTy &ScheduleKind, 3537 const StaticRTInput &Values) { 3538 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3539 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3540 assert(isOpenMPWorksharingDirective(DKind) && 3541 "Expected loop-based or sections-based directive."); 3542 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3543 isOpenMPLoopDirective(DKind) 3544 ? OMP_IDENT_WORK_LOOP 3545 : OMP_IDENT_WORK_SECTIONS); 3546 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3547 llvm::FunctionCallee StaticInitFunction = 3548 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3549 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3550 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3551 } 3552 3553 void CGOpenMPRuntime::emitDistributeStaticInit( 3554 CodeGenFunction &CGF, SourceLocation Loc, 3555 OpenMPDistScheduleClauseKind SchedKind, 3556 const CGOpenMPRuntime::StaticRTInput &Values) { 3557 OpenMPSchedType ScheduleNum = 3558 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3559 llvm::Value *UpdatedLocation = 3560 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3561 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3562 llvm::FunctionCallee StaticInitFunction = 3563 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3564 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3565 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3566 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3567 } 3568 3569 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3570 SourceLocation Loc, 3571 OpenMPDirectiveKind DKind) { 3572 if (!CGF.HaveInsertPoint()) 3573 return; 3574 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3575 llvm::Value *Args[] = { 3576 emitUpdateLocation(CGF, Loc, 3577 isOpenMPDistributeDirective(DKind) 3578 ? OMP_IDENT_WORK_DISTRIBUTE 3579 : isOpenMPLoopDirective(DKind) 3580 ? OMP_IDENT_WORK_LOOP 3581 : OMP_IDENT_WORK_SECTIONS), 3582 getThreadID(CGF, Loc)}; 3583 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3584 Args); 3585 } 3586 3587 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3588 SourceLocation Loc, 3589 unsigned IVSize, 3590 bool IVSigned) { 3591 if (!CGF.HaveInsertPoint()) 3592 return; 3593 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3594 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3595 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3596 } 3597 3598 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3599 SourceLocation Loc, unsigned IVSize, 3600 bool IVSigned, Address IL, 3601 Address LB, Address UB, 3602 Address ST) { 3603 // Call __kmpc_dispatch_next( 3604 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3605 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3606 // kmp_int[32|64] *p_stride); 3607 llvm::Value *Args[] = { 3608 emitUpdateLocation(CGF, Loc), 3609 getThreadID(CGF, Loc), 3610 IL.getPointer(), // &isLastIter 3611 LB.getPointer(), // &Lower 3612 UB.getPointer(), // &Upper 3613 ST.getPointer() // &Stride 3614 }; 3615 llvm::Value *Call = 3616 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3617 return CGF.EmitScalarConversion( 3618 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3619 CGF.getContext().BoolTy, Loc); 3620 } 3621 3622 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3623 llvm::Value *NumThreads, 3624 SourceLocation Loc) { 3625 if (!CGF.HaveInsertPoint()) 3626 return; 3627 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3628 llvm::Value *Args[] = { 3629 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3630 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3631 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3632 Args); 3633 } 3634 3635 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3636 OpenMPProcBindClauseKind ProcBind, 3637 SourceLocation Loc) { 3638 if (!CGF.HaveInsertPoint()) 3639 return; 3640 // Constants for proc bind value accepted by the runtime. 3641 enum ProcBindTy { 3642 ProcBindFalse = 0, 3643 ProcBindTrue, 3644 ProcBindMaster, 3645 ProcBindClose, 3646 ProcBindSpread, 3647 ProcBindIntel, 3648 ProcBindDefault 3649 } RuntimeProcBind; 3650 switch (ProcBind) { 3651 case OMPC_PROC_BIND_master: 3652 RuntimeProcBind = ProcBindMaster; 3653 break; 3654 case OMPC_PROC_BIND_close: 3655 RuntimeProcBind = ProcBindClose; 3656 break; 3657 case OMPC_PROC_BIND_spread: 3658 RuntimeProcBind = ProcBindSpread; 3659 break; 3660 case OMPC_PROC_BIND_unknown: 3661 llvm_unreachable("Unsupported proc_bind value."); 3662 } 3663 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3664 llvm::Value *Args[] = { 3665 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3666 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3667 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3668 } 3669 3670 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3671 SourceLocation Loc) { 3672 if (!CGF.HaveInsertPoint()) 3673 return; 3674 // Build call void __kmpc_flush(ident_t *loc) 3675 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3676 emitUpdateLocation(CGF, Loc)); 3677 } 3678 3679 namespace { 3680 /// Indexes of fields for type kmp_task_t. 3681 enum KmpTaskTFields { 3682 /// List of shared variables. 3683 KmpTaskTShareds, 3684 /// Task routine. 3685 KmpTaskTRoutine, 3686 /// Partition id for the untied tasks. 3687 KmpTaskTPartId, 3688 /// Function with call of destructors for private variables. 3689 Data1, 3690 /// Task priority. 3691 Data2, 3692 /// (Taskloops only) Lower bound. 3693 KmpTaskTLowerBound, 3694 /// (Taskloops only) Upper bound. 3695 KmpTaskTUpperBound, 3696 /// (Taskloops only) Stride. 3697 KmpTaskTStride, 3698 /// (Taskloops only) Is last iteration flag. 3699 KmpTaskTLastIter, 3700 /// (Taskloops only) Reduction data. 3701 KmpTaskTReductions, 3702 }; 3703 } // anonymous namespace 3704 3705 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3706 return OffloadEntriesTargetRegion.empty() && 3707 OffloadEntriesDeviceGlobalVar.empty(); 3708 } 3709 3710 /// Initialize target region entry. 3711 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3712 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3713 StringRef ParentName, unsigned LineNum, 3714 unsigned Order) { 3715 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3716 "only required for the device " 3717 "code generation."); 3718 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3719 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3720 OMPTargetRegionEntryTargetRegion); 3721 ++OffloadingEntriesNum; 3722 } 3723 3724 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3725 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3726 StringRef ParentName, unsigned LineNum, 3727 llvm::Constant *Addr, llvm::Constant *ID, 3728 OMPTargetRegionEntryKind Flags) { 3729 // If we are emitting code for a target, the entry is already initialized, 3730 // only has to be registered. 3731 if (CGM.getLangOpts().OpenMPIsDevice) { 3732 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3733 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3734 DiagnosticsEngine::Error, 3735 "Unable to find target region on line '%0' in the device code."); 3736 CGM.getDiags().Report(DiagID) << LineNum; 3737 return; 3738 } 3739 auto &Entry = 3740 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3741 assert(Entry.isValid() && "Entry not initialized!"); 3742 Entry.setAddress(Addr); 3743 Entry.setID(ID); 3744 Entry.setFlags(Flags); 3745 } else { 3746 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3747 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3748 ++OffloadingEntriesNum; 3749 } 3750 } 3751 3752 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3753 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3754 unsigned LineNum) const { 3755 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3756 if (PerDevice == OffloadEntriesTargetRegion.end()) 3757 return false; 3758 auto PerFile = PerDevice->second.find(FileID); 3759 if (PerFile == PerDevice->second.end()) 3760 return false; 3761 auto PerParentName = PerFile->second.find(ParentName); 3762 if (PerParentName == PerFile->second.end()) 3763 return false; 3764 auto PerLine = PerParentName->second.find(LineNum); 3765 if (PerLine == PerParentName->second.end()) 3766 return false; 3767 // Fail if this entry is already registered. 3768 if (PerLine->second.getAddress() || PerLine->second.getID()) 3769 return false; 3770 return true; 3771 } 3772 3773 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3774 const OffloadTargetRegionEntryInfoActTy &Action) { 3775 // Scan all target region entries and perform the provided action. 3776 for (const auto &D : OffloadEntriesTargetRegion) 3777 for (const auto &F : D.second) 3778 for (const auto &P : F.second) 3779 for (const auto &L : P.second) 3780 Action(D.first, F.first, P.first(), L.first, L.second); 3781 } 3782 3783 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3784 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3785 OMPTargetGlobalVarEntryKind Flags, 3786 unsigned Order) { 3787 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3788 "only required for the device " 3789 "code generation."); 3790 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3791 ++OffloadingEntriesNum; 3792 } 3793 3794 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3795 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3796 CharUnits VarSize, 3797 OMPTargetGlobalVarEntryKind Flags, 3798 llvm::GlobalValue::LinkageTypes Linkage) { 3799 if (CGM.getLangOpts().OpenMPIsDevice) { 3800 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3801 assert(Entry.isValid() && Entry.getFlags() == Flags && 3802 "Entry not initialized!"); 3803 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3804 "Resetting with the new address."); 3805 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3806 if (Entry.getVarSize().isZero()) { 3807 Entry.setVarSize(VarSize); 3808 Entry.setLinkage(Linkage); 3809 } 3810 return; 3811 } 3812 Entry.setVarSize(VarSize); 3813 Entry.setLinkage(Linkage); 3814 Entry.setAddress(Addr); 3815 } else { 3816 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3817 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3818 assert(Entry.isValid() && Entry.getFlags() == Flags && 3819 "Entry not initialized!"); 3820 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3821 "Resetting with the new address."); 3822 if (Entry.getVarSize().isZero()) { 3823 Entry.setVarSize(VarSize); 3824 Entry.setLinkage(Linkage); 3825 } 3826 return; 3827 } 3828 OffloadEntriesDeviceGlobalVar.try_emplace( 3829 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3830 ++OffloadingEntriesNum; 3831 } 3832 } 3833 3834 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3835 actOnDeviceGlobalVarEntriesInfo( 3836 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3837 // Scan all target region entries and perform the provided action. 3838 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3839 Action(E.getKey(), E.getValue()); 3840 } 3841 3842 llvm::Function * 3843 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3844 // If we don't have entries or if we are emitting code for the device, we 3845 // don't need to do anything. 3846 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3847 return nullptr; 3848 3849 llvm::Module &M = CGM.getModule(); 3850 ASTContext &C = CGM.getContext(); 3851 3852 // Get list of devices we care about 3853 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3854 3855 // We should be creating an offloading descriptor only if there are devices 3856 // specified. 3857 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3858 3859 // Create the external variables that will point to the begin and end of the 3860 // host entries section. These will be defined by the linker. 3861 llvm::Type *OffloadEntryTy = 3862 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3863 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3864 auto *HostEntriesBegin = new llvm::GlobalVariable( 3865 M, OffloadEntryTy, /*isConstant=*/true, 3866 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3867 EntriesBeginName); 3868 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3869 auto *HostEntriesEnd = 3870 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3871 llvm::GlobalValue::ExternalLinkage, 3872 /*Initializer=*/nullptr, EntriesEndName); 3873 3874 // Create all device images 3875 auto *DeviceImageTy = cast<llvm::StructType>( 3876 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3877 ConstantInitBuilder DeviceImagesBuilder(CGM); 3878 ConstantArrayBuilder DeviceImagesEntries = 3879 DeviceImagesBuilder.beginArray(DeviceImageTy); 3880 3881 for (const llvm::Triple &Device : Devices) { 3882 StringRef T = Device.getTriple(); 3883 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3884 auto *ImgBegin = new llvm::GlobalVariable( 3885 M, CGM.Int8Ty, /*isConstant=*/true, 3886 llvm::GlobalValue::ExternalWeakLinkage, 3887 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3888 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3889 auto *ImgEnd = new llvm::GlobalVariable( 3890 M, CGM.Int8Ty, /*isConstant=*/true, 3891 llvm::GlobalValue::ExternalWeakLinkage, 3892 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3893 3894 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3895 HostEntriesEnd}; 3896 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3897 DeviceImagesEntries); 3898 } 3899 3900 // Create device images global array. 3901 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3902 llvm::GlobalVariable *DeviceImages = 3903 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 3904 CGM.getPointerAlign(), 3905 /*isConstant=*/true); 3906 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3907 3908 // This is a Zero array to be used in the creation of the constant expressions 3909 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3910 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3911 3912 // Create the target region descriptor. 3913 llvm::Constant *Data[] = { 3914 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 3915 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3916 DeviceImages, Index), 3917 HostEntriesBegin, HostEntriesEnd}; 3918 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 3919 llvm::GlobalVariable *Desc = createGlobalStruct( 3920 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 3921 3922 // Emit code to register or unregister the descriptor at execution 3923 // startup or closing, respectively. 3924 3925 llvm::Function *UnRegFn; 3926 { 3927 FunctionArgList Args; 3928 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3929 Args.push_back(&DummyPtr); 3930 3931 CodeGenFunction CGF(CGM); 3932 // Disable debug info for global (de-)initializer because they are not part 3933 // of some particular construct. 3934 CGF.disableDebugInfo(); 3935 const auto &FI = 3936 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3937 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3938 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 3939 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 3940 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 3941 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3942 Desc); 3943 CGF.FinishFunction(); 3944 } 3945 llvm::Function *RegFn; 3946 { 3947 CodeGenFunction CGF(CGM); 3948 // Disable debug info for global (de-)initializer because they are not part 3949 // of some particular construct. 3950 CGF.disableDebugInfo(); 3951 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 3952 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3953 3954 // Encode offload target triples into the registration function name. It 3955 // will serve as a comdat key for the registration/unregistration code for 3956 // this particular combination of offloading targets. 3957 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 3958 RegFnNameParts[0] = "omp_offloading"; 3959 RegFnNameParts[1] = "descriptor_reg"; 3960 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 3961 [](const llvm::Triple &T) -> const std::string& { 3962 return T.getTriple(); 3963 }); 3964 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 3965 std::string Descriptor = getName(RegFnNameParts); 3966 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 3967 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 3968 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 3969 // Create a variable to drive the registration and unregistration of the 3970 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3971 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 3972 SourceLocation(), nullptr, C.CharTy, 3973 ImplicitParamDecl::Other); 3974 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3975 CGF.FinishFunction(); 3976 } 3977 if (CGM.supportsCOMDAT()) { 3978 // It is sufficient to call registration function only once, so create a 3979 // COMDAT group for registration/unregistration functions and associated 3980 // data. That would reduce startup time and code size. Registration 3981 // function serves as a COMDAT group key. 3982 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3983 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3984 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3985 RegFn->setComdat(ComdatKey); 3986 UnRegFn->setComdat(ComdatKey); 3987 DeviceImages->setComdat(ComdatKey); 3988 Desc->setComdat(ComdatKey); 3989 } 3990 return RegFn; 3991 } 3992 3993 void CGOpenMPRuntime::createOffloadEntry( 3994 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3995 llvm::GlobalValue::LinkageTypes Linkage) { 3996 StringRef Name = Addr->getName(); 3997 llvm::Module &M = CGM.getModule(); 3998 llvm::LLVMContext &C = M.getContext(); 3999 4000 // Create constant string with the name. 4001 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4002 4003 std::string StringName = getName({"omp_offloading", "entry_name"}); 4004 auto *Str = new llvm::GlobalVariable( 4005 M, StrPtrInit->getType(), /*isConstant=*/true, 4006 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4007 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4008 4009 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4010 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4011 llvm::ConstantInt::get(CGM.SizeTy, Size), 4012 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4013 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4014 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4015 llvm::GlobalVariable *Entry = createGlobalStruct( 4016 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4017 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4018 4019 // The entry has to be created in the section the linker expects it to be. 4020 std::string Section = getName({"omp_offloading", "entries"}); 4021 Entry->setSection(Section); 4022 } 4023 4024 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4025 // Emit the offloading entries and metadata so that the device codegen side 4026 // can easily figure out what to emit. The produced metadata looks like 4027 // this: 4028 // 4029 // !omp_offload.info = !{!1, ...} 4030 // 4031 // Right now we only generate metadata for function that contain target 4032 // regions. 4033 4034 // If we do not have entries, we don't need to do anything. 4035 if (OffloadEntriesInfoManager.empty()) 4036 return; 4037 4038 llvm::Module &M = CGM.getModule(); 4039 llvm::LLVMContext &C = M.getContext(); 4040 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 4041 OrderedEntries(OffloadEntriesInfoManager.size()); 4042 llvm::SmallVector<StringRef, 16> ParentFunctions( 4043 OffloadEntriesInfoManager.size()); 4044 4045 // Auxiliary methods to create metadata values and strings. 4046 auto &&GetMDInt = [this](unsigned V) { 4047 return llvm::ConstantAsMetadata::get( 4048 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4049 }; 4050 4051 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4052 4053 // Create the offloading info metadata node. 4054 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4055 4056 // Create function that emits metadata for each target region entry; 4057 auto &&TargetRegionMetadataEmitter = 4058 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4059 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4060 unsigned Line, 4061 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4062 // Generate metadata for target regions. Each entry of this metadata 4063 // contains: 4064 // - Entry 0 -> Kind of this type of metadata (0). 4065 // - Entry 1 -> Device ID of the file where the entry was identified. 4066 // - Entry 2 -> File ID of the file where the entry was identified. 4067 // - Entry 3 -> Mangled name of the function where the entry was 4068 // identified. 4069 // - Entry 4 -> Line in the file where the entry was identified. 4070 // - Entry 5 -> Order the entry was created. 4071 // The first element of the metadata node is the kind. 4072 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4073 GetMDInt(FileID), GetMDString(ParentName), 4074 GetMDInt(Line), GetMDInt(E.getOrder())}; 4075 4076 // Save this entry in the right position of the ordered entries array. 4077 OrderedEntries[E.getOrder()] = &E; 4078 ParentFunctions[E.getOrder()] = ParentName; 4079 4080 // Add metadata to the named metadata node. 4081 MD->addOperand(llvm::MDNode::get(C, Ops)); 4082 }; 4083 4084 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4085 TargetRegionMetadataEmitter); 4086 4087 // Create function that emits metadata for each device global variable entry; 4088 auto &&DeviceGlobalVarMetadataEmitter = 4089 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4090 MD](StringRef MangledName, 4091 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4092 &E) { 4093 // Generate metadata for global variables. Each entry of this metadata 4094 // contains: 4095 // - Entry 0 -> Kind of this type of metadata (1). 4096 // - Entry 1 -> Mangled name of the variable. 4097 // - Entry 2 -> Declare target kind. 4098 // - Entry 3 -> Order the entry was created. 4099 // The first element of the metadata node is the kind. 4100 llvm::Metadata *Ops[] = { 4101 GetMDInt(E.getKind()), GetMDString(MangledName), 4102 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4103 4104 // Save this entry in the right position of the ordered entries array. 4105 OrderedEntries[E.getOrder()] = &E; 4106 4107 // Add metadata to the named metadata node. 4108 MD->addOperand(llvm::MDNode::get(C, Ops)); 4109 }; 4110 4111 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4112 DeviceGlobalVarMetadataEmitter); 4113 4114 for (const auto *E : OrderedEntries) { 4115 assert(E && "All ordered entries must exist!"); 4116 if (const auto *CE = 4117 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4118 E)) { 4119 if (!CE->getID() || !CE->getAddress()) { 4120 // Do not blame the entry if the parent funtion is not emitted. 4121 StringRef FnName = ParentFunctions[CE->getOrder()]; 4122 if (!CGM.GetGlobalValue(FnName)) 4123 continue; 4124 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4125 DiagnosticsEngine::Error, 4126 "Offloading entry for target region is incorrect: either the " 4127 "address or the ID is invalid."); 4128 CGM.getDiags().Report(DiagID); 4129 continue; 4130 } 4131 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4132 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4133 } else if (const auto *CE = 4134 dyn_cast<OffloadEntriesInfoManagerTy:: 4135 OffloadEntryInfoDeviceGlobalVar>(E)) { 4136 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4137 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4138 CE->getFlags()); 4139 switch (Flags) { 4140 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4141 if (!CE->getAddress()) { 4142 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4143 DiagnosticsEngine::Error, 4144 "Offloading entry for declare target variable is incorrect: the " 4145 "address is invalid."); 4146 CGM.getDiags().Report(DiagID); 4147 continue; 4148 } 4149 // The vaiable has no definition - no need to add the entry. 4150 if (CE->getVarSize().isZero()) 4151 continue; 4152 break; 4153 } 4154 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4155 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4156 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4157 "Declaret target link address is set."); 4158 if (CGM.getLangOpts().OpenMPIsDevice) 4159 continue; 4160 if (!CE->getAddress()) { 4161 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4162 DiagnosticsEngine::Error, 4163 "Offloading entry for declare target variable is incorrect: the " 4164 "address is invalid."); 4165 CGM.getDiags().Report(DiagID); 4166 continue; 4167 } 4168 break; 4169 } 4170 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4171 CE->getVarSize().getQuantity(), Flags, 4172 CE->getLinkage()); 4173 } else { 4174 llvm_unreachable("Unsupported entry kind."); 4175 } 4176 } 4177 } 4178 4179 /// Loads all the offload entries information from the host IR 4180 /// metadata. 4181 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4182 // If we are in target mode, load the metadata from the host IR. This code has 4183 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4184 4185 if (!CGM.getLangOpts().OpenMPIsDevice) 4186 return; 4187 4188 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4189 return; 4190 4191 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4192 if (auto EC = Buf.getError()) { 4193 CGM.getDiags().Report(diag::err_cannot_open_file) 4194 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4195 return; 4196 } 4197 4198 llvm::LLVMContext C; 4199 auto ME = expectedToErrorOrAndEmitErrors( 4200 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4201 4202 if (auto EC = ME.getError()) { 4203 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4204 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4205 CGM.getDiags().Report(DiagID) 4206 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4207 return; 4208 } 4209 4210 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4211 if (!MD) 4212 return; 4213 4214 for (llvm::MDNode *MN : MD->operands()) { 4215 auto &&GetMDInt = [MN](unsigned Idx) { 4216 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4217 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4218 }; 4219 4220 auto &&GetMDString = [MN](unsigned Idx) { 4221 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4222 return V->getString(); 4223 }; 4224 4225 switch (GetMDInt(0)) { 4226 default: 4227 llvm_unreachable("Unexpected metadata!"); 4228 break; 4229 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4230 OffloadingEntryInfoTargetRegion: 4231 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4232 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4233 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4234 /*Order=*/GetMDInt(5)); 4235 break; 4236 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4237 OffloadingEntryInfoDeviceGlobalVar: 4238 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4239 /*MangledName=*/GetMDString(1), 4240 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4241 /*Flags=*/GetMDInt(2)), 4242 /*Order=*/GetMDInt(3)); 4243 break; 4244 } 4245 } 4246 } 4247 4248 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4249 if (!KmpRoutineEntryPtrTy) { 4250 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4251 ASTContext &C = CGM.getContext(); 4252 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4253 FunctionProtoType::ExtProtoInfo EPI; 4254 KmpRoutineEntryPtrQTy = C.getPointerType( 4255 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4256 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4257 } 4258 } 4259 4260 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4261 // Make sure the type of the entry is already created. This is the type we 4262 // have to create: 4263 // struct __tgt_offload_entry{ 4264 // void *addr; // Pointer to the offload entry info. 4265 // // (function or global) 4266 // char *name; // Name of the function or global. 4267 // size_t size; // Size of the entry info (0 if it a function). 4268 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4269 // int32_t reserved; // Reserved, to use by the runtime library. 4270 // }; 4271 if (TgtOffloadEntryQTy.isNull()) { 4272 ASTContext &C = CGM.getContext(); 4273 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4274 RD->startDefinition(); 4275 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4276 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4277 addFieldToRecordDecl(C, RD, C.getSizeType()); 4278 addFieldToRecordDecl( 4279 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4280 addFieldToRecordDecl( 4281 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4282 RD->completeDefinition(); 4283 RD->addAttr(PackedAttr::CreateImplicit(C)); 4284 TgtOffloadEntryQTy = C.getRecordType(RD); 4285 } 4286 return TgtOffloadEntryQTy; 4287 } 4288 4289 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4290 // These are the types we need to build: 4291 // struct __tgt_device_image{ 4292 // void *ImageStart; // Pointer to the target code start. 4293 // void *ImageEnd; // Pointer to the target code end. 4294 // // We also add the host entries to the device image, as it may be useful 4295 // // for the target runtime to have access to that information. 4296 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4297 // // the entries. 4298 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4299 // // entries (non inclusive). 4300 // }; 4301 if (TgtDeviceImageQTy.isNull()) { 4302 ASTContext &C = CGM.getContext(); 4303 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4304 RD->startDefinition(); 4305 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4306 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4307 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4308 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4309 RD->completeDefinition(); 4310 TgtDeviceImageQTy = C.getRecordType(RD); 4311 } 4312 return TgtDeviceImageQTy; 4313 } 4314 4315 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4316 // struct __tgt_bin_desc{ 4317 // int32_t NumDevices; // Number of devices supported. 4318 // __tgt_device_image *DeviceImages; // Arrays of device images 4319 // // (one per device). 4320 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4321 // // entries. 4322 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4323 // // entries (non inclusive). 4324 // }; 4325 if (TgtBinaryDescriptorQTy.isNull()) { 4326 ASTContext &C = CGM.getContext(); 4327 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4328 RD->startDefinition(); 4329 addFieldToRecordDecl( 4330 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4331 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4332 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4333 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4334 RD->completeDefinition(); 4335 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4336 } 4337 return TgtBinaryDescriptorQTy; 4338 } 4339 4340 namespace { 4341 struct PrivateHelpersTy { 4342 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4343 const VarDecl *PrivateElemInit) 4344 : Original(Original), PrivateCopy(PrivateCopy), 4345 PrivateElemInit(PrivateElemInit) {} 4346 const VarDecl *Original; 4347 const VarDecl *PrivateCopy; 4348 const VarDecl *PrivateElemInit; 4349 }; 4350 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4351 } // anonymous namespace 4352 4353 static RecordDecl * 4354 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4355 if (!Privates.empty()) { 4356 ASTContext &C = CGM.getContext(); 4357 // Build struct .kmp_privates_t. { 4358 // /* private vars */ 4359 // }; 4360 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4361 RD->startDefinition(); 4362 for (const auto &Pair : Privates) { 4363 const VarDecl *VD = Pair.second.Original; 4364 QualType Type = VD->getType().getNonReferenceType(); 4365 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4366 if (VD->hasAttrs()) { 4367 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4368 E(VD->getAttrs().end()); 4369 I != E; ++I) 4370 FD->addAttr(*I); 4371 } 4372 } 4373 RD->completeDefinition(); 4374 return RD; 4375 } 4376 return nullptr; 4377 } 4378 4379 static RecordDecl * 4380 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4381 QualType KmpInt32Ty, 4382 QualType KmpRoutineEntryPointerQTy) { 4383 ASTContext &C = CGM.getContext(); 4384 // Build struct kmp_task_t { 4385 // void * shareds; 4386 // kmp_routine_entry_t routine; 4387 // kmp_int32 part_id; 4388 // kmp_cmplrdata_t data1; 4389 // kmp_cmplrdata_t data2; 4390 // For taskloops additional fields: 4391 // kmp_uint64 lb; 4392 // kmp_uint64 ub; 4393 // kmp_int64 st; 4394 // kmp_int32 liter; 4395 // void * reductions; 4396 // }; 4397 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4398 UD->startDefinition(); 4399 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4400 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4401 UD->completeDefinition(); 4402 QualType KmpCmplrdataTy = C.getRecordType(UD); 4403 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4404 RD->startDefinition(); 4405 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4406 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4407 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4408 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4409 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4410 if (isOpenMPTaskLoopDirective(Kind)) { 4411 QualType KmpUInt64Ty = 4412 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4413 QualType KmpInt64Ty = 4414 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4415 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4416 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4417 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4418 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4419 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4420 } 4421 RD->completeDefinition(); 4422 return RD; 4423 } 4424 4425 static RecordDecl * 4426 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4427 ArrayRef<PrivateDataTy> Privates) { 4428 ASTContext &C = CGM.getContext(); 4429 // Build struct kmp_task_t_with_privates { 4430 // kmp_task_t task_data; 4431 // .kmp_privates_t. privates; 4432 // }; 4433 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4434 RD->startDefinition(); 4435 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4436 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4437 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4438 RD->completeDefinition(); 4439 return RD; 4440 } 4441 4442 /// Emit a proxy function which accepts kmp_task_t as the second 4443 /// argument. 4444 /// \code 4445 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4446 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4447 /// For taskloops: 4448 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4449 /// tt->reductions, tt->shareds); 4450 /// return 0; 4451 /// } 4452 /// \endcode 4453 static llvm::Function * 4454 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4455 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4456 QualType KmpTaskTWithPrivatesPtrQTy, 4457 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4458 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4459 llvm::Value *TaskPrivatesMap) { 4460 ASTContext &C = CGM.getContext(); 4461 FunctionArgList Args; 4462 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4463 ImplicitParamDecl::Other); 4464 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4465 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4466 ImplicitParamDecl::Other); 4467 Args.push_back(&GtidArg); 4468 Args.push_back(&TaskTypeArg); 4469 const auto &TaskEntryFnInfo = 4470 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4471 llvm::FunctionType *TaskEntryTy = 4472 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4473 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4474 auto *TaskEntry = llvm::Function::Create( 4475 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4476 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4477 TaskEntry->setDoesNotRecurse(); 4478 CodeGenFunction CGF(CGM); 4479 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4480 Loc, Loc); 4481 4482 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4483 // tt, 4484 // For taskloops: 4485 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4486 // tt->task_data.shareds); 4487 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4488 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4489 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4490 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4491 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4492 const auto *KmpTaskTWithPrivatesQTyRD = 4493 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4494 LValue Base = 4495 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4496 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4497 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4498 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4499 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4500 4501 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4502 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4503 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4504 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4505 CGF.ConvertTypeForMem(SharedsPtrTy)); 4506 4507 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4508 llvm::Value *PrivatesParam; 4509 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4510 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4511 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4512 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4513 } else { 4514 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4515 } 4516 4517 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4518 TaskPrivatesMap, 4519 CGF.Builder 4520 .CreatePointerBitCastOrAddrSpaceCast( 4521 TDBase.getAddress(), CGF.VoidPtrTy) 4522 .getPointer()}; 4523 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4524 std::end(CommonArgs)); 4525 if (isOpenMPTaskLoopDirective(Kind)) { 4526 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4527 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4528 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4529 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4530 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4531 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4532 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4533 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4534 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4535 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4536 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4537 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4538 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4539 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4540 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4541 CallArgs.push_back(LBParam); 4542 CallArgs.push_back(UBParam); 4543 CallArgs.push_back(StParam); 4544 CallArgs.push_back(LIParam); 4545 CallArgs.push_back(RParam); 4546 } 4547 CallArgs.push_back(SharedsParam); 4548 4549 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4550 CallArgs); 4551 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4552 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4553 CGF.FinishFunction(); 4554 return TaskEntry; 4555 } 4556 4557 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4558 SourceLocation Loc, 4559 QualType KmpInt32Ty, 4560 QualType KmpTaskTWithPrivatesPtrQTy, 4561 QualType KmpTaskTWithPrivatesQTy) { 4562 ASTContext &C = CGM.getContext(); 4563 FunctionArgList Args; 4564 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4565 ImplicitParamDecl::Other); 4566 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4567 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4568 ImplicitParamDecl::Other); 4569 Args.push_back(&GtidArg); 4570 Args.push_back(&TaskTypeArg); 4571 const auto &DestructorFnInfo = 4572 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4573 llvm::FunctionType *DestructorFnTy = 4574 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4575 std::string Name = 4576 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4577 auto *DestructorFn = 4578 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4579 Name, &CGM.getModule()); 4580 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4581 DestructorFnInfo); 4582 DestructorFn->setDoesNotRecurse(); 4583 CodeGenFunction CGF(CGM); 4584 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4585 Args, Loc, Loc); 4586 4587 LValue Base = CGF.EmitLoadOfPointerLValue( 4588 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4589 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4590 const auto *KmpTaskTWithPrivatesQTyRD = 4591 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4592 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4593 Base = CGF.EmitLValueForField(Base, *FI); 4594 for (const auto *Field : 4595 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4596 if (QualType::DestructionKind DtorKind = 4597 Field->getType().isDestructedType()) { 4598 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4599 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4600 } 4601 } 4602 CGF.FinishFunction(); 4603 return DestructorFn; 4604 } 4605 4606 /// Emit a privates mapping function for correct handling of private and 4607 /// firstprivate variables. 4608 /// \code 4609 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4610 /// **noalias priv1,..., <tyn> **noalias privn) { 4611 /// *priv1 = &.privates.priv1; 4612 /// ...; 4613 /// *privn = &.privates.privn; 4614 /// } 4615 /// \endcode 4616 static llvm::Value * 4617 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4618 ArrayRef<const Expr *> PrivateVars, 4619 ArrayRef<const Expr *> FirstprivateVars, 4620 ArrayRef<const Expr *> LastprivateVars, 4621 QualType PrivatesQTy, 4622 ArrayRef<PrivateDataTy> Privates) { 4623 ASTContext &C = CGM.getContext(); 4624 FunctionArgList Args; 4625 ImplicitParamDecl TaskPrivatesArg( 4626 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4627 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4628 ImplicitParamDecl::Other); 4629 Args.push_back(&TaskPrivatesArg); 4630 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4631 unsigned Counter = 1; 4632 for (const Expr *E : PrivateVars) { 4633 Args.push_back(ImplicitParamDecl::Create( 4634 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4635 C.getPointerType(C.getPointerType(E->getType())) 4636 .withConst() 4637 .withRestrict(), 4638 ImplicitParamDecl::Other)); 4639 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4640 PrivateVarsPos[VD] = Counter; 4641 ++Counter; 4642 } 4643 for (const Expr *E : FirstprivateVars) { 4644 Args.push_back(ImplicitParamDecl::Create( 4645 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4646 C.getPointerType(C.getPointerType(E->getType())) 4647 .withConst() 4648 .withRestrict(), 4649 ImplicitParamDecl::Other)); 4650 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4651 PrivateVarsPos[VD] = Counter; 4652 ++Counter; 4653 } 4654 for (const Expr *E : LastprivateVars) { 4655 Args.push_back(ImplicitParamDecl::Create( 4656 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4657 C.getPointerType(C.getPointerType(E->getType())) 4658 .withConst() 4659 .withRestrict(), 4660 ImplicitParamDecl::Other)); 4661 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4662 PrivateVarsPos[VD] = Counter; 4663 ++Counter; 4664 } 4665 const auto &TaskPrivatesMapFnInfo = 4666 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4667 llvm::FunctionType *TaskPrivatesMapTy = 4668 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4669 std::string Name = 4670 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4671 auto *TaskPrivatesMap = llvm::Function::Create( 4672 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4673 &CGM.getModule()); 4674 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4675 TaskPrivatesMapFnInfo); 4676 if (CGM.getLangOpts().Optimize) { 4677 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4678 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4679 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4680 } 4681 CodeGenFunction CGF(CGM); 4682 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4683 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4684 4685 // *privi = &.privates.privi; 4686 LValue Base = CGF.EmitLoadOfPointerLValue( 4687 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4688 TaskPrivatesArg.getType()->castAs<PointerType>()); 4689 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4690 Counter = 0; 4691 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4692 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4693 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4694 LValue RefLVal = 4695 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4696 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4697 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4698 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4699 ++Counter; 4700 } 4701 CGF.FinishFunction(); 4702 return TaskPrivatesMap; 4703 } 4704 4705 /// Emit initialization for private variables in task-based directives. 4706 static void emitPrivatesInit(CodeGenFunction &CGF, 4707 const OMPExecutableDirective &D, 4708 Address KmpTaskSharedsPtr, LValue TDBase, 4709 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4710 QualType SharedsTy, QualType SharedsPtrTy, 4711 const OMPTaskDataTy &Data, 4712 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4713 ASTContext &C = CGF.getContext(); 4714 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4715 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4716 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4717 ? OMPD_taskloop 4718 : OMPD_task; 4719 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4720 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4721 LValue SrcBase; 4722 bool IsTargetTask = 4723 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4724 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4725 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4726 // PointersArray and SizesArray. The original variables for these arrays are 4727 // not captured and we get their addresses explicitly. 4728 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4729 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4730 SrcBase = CGF.MakeAddrLValue( 4731 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4732 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4733 SharedsTy); 4734 } 4735 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4736 for (const PrivateDataTy &Pair : Privates) { 4737 const VarDecl *VD = Pair.second.PrivateCopy; 4738 const Expr *Init = VD->getAnyInitializer(); 4739 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4740 !CGF.isTrivialInitializer(Init)))) { 4741 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4742 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4743 const VarDecl *OriginalVD = Pair.second.Original; 4744 // Check if the variable is the target-based BasePointersArray, 4745 // PointersArray or SizesArray. 4746 LValue SharedRefLValue; 4747 QualType Type = PrivateLValue.getType(); 4748 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4749 if (IsTargetTask && !SharedField) { 4750 assert(isa<ImplicitParamDecl>(OriginalVD) && 4751 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4752 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4753 ->getNumParams() == 0 && 4754 isa<TranslationUnitDecl>( 4755 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4756 ->getDeclContext()) && 4757 "Expected artificial target data variable."); 4758 SharedRefLValue = 4759 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4760 } else { 4761 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4762 SharedRefLValue = CGF.MakeAddrLValue( 4763 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4764 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4765 SharedRefLValue.getTBAAInfo()); 4766 } 4767 if (Type->isArrayType()) { 4768 // Initialize firstprivate array. 4769 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4770 // Perform simple memcpy. 4771 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4772 } else { 4773 // Initialize firstprivate array using element-by-element 4774 // initialization. 4775 CGF.EmitOMPAggregateAssign( 4776 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4777 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4778 Address SrcElement) { 4779 // Clean up any temporaries needed by the initialization. 4780 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4781 InitScope.addPrivate( 4782 Elem, [SrcElement]() -> Address { return SrcElement; }); 4783 (void)InitScope.Privatize(); 4784 // Emit initialization for single element. 4785 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4786 CGF, &CapturesInfo); 4787 CGF.EmitAnyExprToMem(Init, DestElement, 4788 Init->getType().getQualifiers(), 4789 /*IsInitializer=*/false); 4790 }); 4791 } 4792 } else { 4793 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4794 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4795 return SharedRefLValue.getAddress(); 4796 }); 4797 (void)InitScope.Privatize(); 4798 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4799 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4800 /*capturedByInit=*/false); 4801 } 4802 } else { 4803 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4804 } 4805 } 4806 ++FI; 4807 } 4808 } 4809 4810 /// Check if duplication function is required for taskloops. 4811 static bool checkInitIsRequired(CodeGenFunction &CGF, 4812 ArrayRef<PrivateDataTy> Privates) { 4813 bool InitRequired = false; 4814 for (const PrivateDataTy &Pair : Privates) { 4815 const VarDecl *VD = Pair.second.PrivateCopy; 4816 const Expr *Init = VD->getAnyInitializer(); 4817 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4818 !CGF.isTrivialInitializer(Init)); 4819 if (InitRequired) 4820 break; 4821 } 4822 return InitRequired; 4823 } 4824 4825 4826 /// Emit task_dup function (for initialization of 4827 /// private/firstprivate/lastprivate vars and last_iter flag) 4828 /// \code 4829 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4830 /// lastpriv) { 4831 /// // setup lastprivate flag 4832 /// task_dst->last = lastpriv; 4833 /// // could be constructor calls here... 4834 /// } 4835 /// \endcode 4836 static llvm::Value * 4837 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4838 const OMPExecutableDirective &D, 4839 QualType KmpTaskTWithPrivatesPtrQTy, 4840 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4841 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4842 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4843 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4844 ASTContext &C = CGM.getContext(); 4845 FunctionArgList Args; 4846 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4847 KmpTaskTWithPrivatesPtrQTy, 4848 ImplicitParamDecl::Other); 4849 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4850 KmpTaskTWithPrivatesPtrQTy, 4851 ImplicitParamDecl::Other); 4852 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4853 ImplicitParamDecl::Other); 4854 Args.push_back(&DstArg); 4855 Args.push_back(&SrcArg); 4856 Args.push_back(&LastprivArg); 4857 const auto &TaskDupFnInfo = 4858 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4859 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4860 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4861 auto *TaskDup = llvm::Function::Create( 4862 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4863 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4864 TaskDup->setDoesNotRecurse(); 4865 CodeGenFunction CGF(CGM); 4866 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4867 Loc); 4868 4869 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4870 CGF.GetAddrOfLocalVar(&DstArg), 4871 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4872 // task_dst->liter = lastpriv; 4873 if (WithLastIter) { 4874 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4875 LValue Base = CGF.EmitLValueForField( 4876 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4877 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4878 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4879 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4880 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4881 } 4882 4883 // Emit initial values for private copies (if any). 4884 assert(!Privates.empty()); 4885 Address KmpTaskSharedsPtr = Address::invalid(); 4886 if (!Data.FirstprivateVars.empty()) { 4887 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4888 CGF.GetAddrOfLocalVar(&SrcArg), 4889 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4890 LValue Base = CGF.EmitLValueForField( 4891 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4892 KmpTaskSharedsPtr = Address( 4893 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4894 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4895 KmpTaskTShareds)), 4896 Loc), 4897 CGF.getNaturalTypeAlignment(SharedsTy)); 4898 } 4899 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4900 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4901 CGF.FinishFunction(); 4902 return TaskDup; 4903 } 4904 4905 /// Checks if destructor function is required to be generated. 4906 /// \return true if cleanups are required, false otherwise. 4907 static bool 4908 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4909 bool NeedsCleanup = false; 4910 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4911 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4912 for (const FieldDecl *FD : PrivateRD->fields()) { 4913 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4914 if (NeedsCleanup) 4915 break; 4916 } 4917 return NeedsCleanup; 4918 } 4919 4920 CGOpenMPRuntime::TaskResultTy 4921 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4922 const OMPExecutableDirective &D, 4923 llvm::Function *TaskFunction, QualType SharedsTy, 4924 Address Shareds, const OMPTaskDataTy &Data) { 4925 ASTContext &C = CGM.getContext(); 4926 llvm::SmallVector<PrivateDataTy, 4> Privates; 4927 // Aggregate privates and sort them by the alignment. 4928 auto I = Data.PrivateCopies.begin(); 4929 for (const Expr *E : Data.PrivateVars) { 4930 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4931 Privates.emplace_back( 4932 C.getDeclAlign(VD), 4933 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4934 /*PrivateElemInit=*/nullptr)); 4935 ++I; 4936 } 4937 I = Data.FirstprivateCopies.begin(); 4938 auto IElemInitRef = Data.FirstprivateInits.begin(); 4939 for (const Expr *E : Data.FirstprivateVars) { 4940 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4941 Privates.emplace_back( 4942 C.getDeclAlign(VD), 4943 PrivateHelpersTy( 4944 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4945 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4946 ++I; 4947 ++IElemInitRef; 4948 } 4949 I = Data.LastprivateCopies.begin(); 4950 for (const Expr *E : Data.LastprivateVars) { 4951 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4952 Privates.emplace_back( 4953 C.getDeclAlign(VD), 4954 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4955 /*PrivateElemInit=*/nullptr)); 4956 ++I; 4957 } 4958 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4959 return L.first > R.first; 4960 }); 4961 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4962 // Build type kmp_routine_entry_t (if not built yet). 4963 emitKmpRoutineEntryT(KmpInt32Ty); 4964 // Build type kmp_task_t (if not built yet). 4965 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4966 if (SavedKmpTaskloopTQTy.isNull()) { 4967 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4968 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4969 } 4970 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4971 } else { 4972 assert((D.getDirectiveKind() == OMPD_task || 4973 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4974 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4975 "Expected taskloop, task or target directive"); 4976 if (SavedKmpTaskTQTy.isNull()) { 4977 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4978 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4979 } 4980 KmpTaskTQTy = SavedKmpTaskTQTy; 4981 } 4982 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4983 // Build particular struct kmp_task_t for the given task. 4984 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4985 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4986 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4987 QualType KmpTaskTWithPrivatesPtrQTy = 4988 C.getPointerType(KmpTaskTWithPrivatesQTy); 4989 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4990 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4991 KmpTaskTWithPrivatesTy->getPointerTo(); 4992 llvm::Value *KmpTaskTWithPrivatesTySize = 4993 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4994 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4995 4996 // Emit initial values for private copies (if any). 4997 llvm::Value *TaskPrivatesMap = nullptr; 4998 llvm::Type *TaskPrivatesMapTy = 4999 std::next(TaskFunction->arg_begin(), 3)->getType(); 5000 if (!Privates.empty()) { 5001 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5002 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5003 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5004 FI->getType(), Privates); 5005 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5006 TaskPrivatesMap, TaskPrivatesMapTy); 5007 } else { 5008 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5009 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5010 } 5011 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5012 // kmp_task_t *tt); 5013 llvm::Function *TaskEntry = emitProxyTaskFunction( 5014 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5015 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5016 TaskPrivatesMap); 5017 5018 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5019 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5020 // kmp_routine_entry_t *task_entry); 5021 // Task flags. Format is taken from 5022 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5023 // description of kmp_tasking_flags struct. 5024 enum { 5025 TiedFlag = 0x1, 5026 FinalFlag = 0x2, 5027 DestructorsFlag = 0x8, 5028 PriorityFlag = 0x20 5029 }; 5030 unsigned Flags = Data.Tied ? TiedFlag : 0; 5031 bool NeedsCleanup = false; 5032 if (!Privates.empty()) { 5033 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5034 if (NeedsCleanup) 5035 Flags = Flags | DestructorsFlag; 5036 } 5037 if (Data.Priority.getInt()) 5038 Flags = Flags | PriorityFlag; 5039 llvm::Value *TaskFlags = 5040 Data.Final.getPointer() 5041 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5042 CGF.Builder.getInt32(FinalFlag), 5043 CGF.Builder.getInt32(/*C=*/0)) 5044 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5045 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5046 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5047 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 5048 getThreadID(CGF, Loc), TaskFlags, 5049 KmpTaskTWithPrivatesTySize, SharedsSize, 5050 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5051 TaskEntry, KmpRoutineEntryPtrTy)}; 5052 llvm::Value *NewTask = CGF.EmitRuntimeCall( 5053 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5054 llvm::Value *NewTaskNewTaskTTy = 5055 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5056 NewTask, KmpTaskTWithPrivatesPtrTy); 5057 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5058 KmpTaskTWithPrivatesQTy); 5059 LValue TDBase = 5060 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5061 // Fill the data in the resulting kmp_task_t record. 5062 // Copy shareds if there are any. 5063 Address KmpTaskSharedsPtr = Address::invalid(); 5064 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5065 KmpTaskSharedsPtr = 5066 Address(CGF.EmitLoadOfScalar( 5067 CGF.EmitLValueForField( 5068 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5069 KmpTaskTShareds)), 5070 Loc), 5071 CGF.getNaturalTypeAlignment(SharedsTy)); 5072 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5073 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5074 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5075 } 5076 // Emit initial values for private copies (if any). 5077 TaskResultTy Result; 5078 if (!Privates.empty()) { 5079 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5080 SharedsTy, SharedsPtrTy, Data, Privates, 5081 /*ForDup=*/false); 5082 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5083 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5084 Result.TaskDupFn = emitTaskDupFunction( 5085 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5086 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5087 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5088 } 5089 } 5090 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5091 enum { Priority = 0, Destructors = 1 }; 5092 // Provide pointer to function with destructors for privates. 5093 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5094 const RecordDecl *KmpCmplrdataUD = 5095 (*FI)->getType()->getAsUnionType()->getDecl(); 5096 if (NeedsCleanup) { 5097 llvm::Value *DestructorFn = emitDestructorsFunction( 5098 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5099 KmpTaskTWithPrivatesQTy); 5100 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5101 LValue DestructorsLV = CGF.EmitLValueForField( 5102 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5103 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5104 DestructorFn, KmpRoutineEntryPtrTy), 5105 DestructorsLV); 5106 } 5107 // Set priority. 5108 if (Data.Priority.getInt()) { 5109 LValue Data2LV = CGF.EmitLValueForField( 5110 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5111 LValue PriorityLV = CGF.EmitLValueForField( 5112 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5113 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5114 } 5115 Result.NewTask = NewTask; 5116 Result.TaskEntry = TaskEntry; 5117 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5118 Result.TDBase = TDBase; 5119 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5120 return Result; 5121 } 5122 5123 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5124 const OMPExecutableDirective &D, 5125 llvm::Function *TaskFunction, 5126 QualType SharedsTy, Address Shareds, 5127 const Expr *IfCond, 5128 const OMPTaskDataTy &Data) { 5129 if (!CGF.HaveInsertPoint()) 5130 return; 5131 5132 TaskResultTy Result = 5133 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5134 llvm::Value *NewTask = Result.NewTask; 5135 llvm::Function *TaskEntry = Result.TaskEntry; 5136 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5137 LValue TDBase = Result.TDBase; 5138 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5139 ASTContext &C = CGM.getContext(); 5140 // Process list of dependences. 5141 Address DependenciesArray = Address::invalid(); 5142 unsigned NumDependencies = Data.Dependences.size(); 5143 if (NumDependencies) { 5144 // Dependence kind for RTL. 5145 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5146 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5147 RecordDecl *KmpDependInfoRD; 5148 QualType FlagsTy = 5149 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5150 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5151 if (KmpDependInfoTy.isNull()) { 5152 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5153 KmpDependInfoRD->startDefinition(); 5154 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5155 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5156 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5157 KmpDependInfoRD->completeDefinition(); 5158 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5159 } else { 5160 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5161 } 5162 // Define type kmp_depend_info[<Dependences.size()>]; 5163 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5164 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5165 ArrayType::Normal, /*IndexTypeQuals=*/0); 5166 // kmp_depend_info[<Dependences.size()>] deps; 5167 DependenciesArray = 5168 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5169 for (unsigned I = 0; I < NumDependencies; ++I) { 5170 const Expr *E = Data.Dependences[I].second; 5171 LValue Addr = CGF.EmitLValue(E); 5172 llvm::Value *Size; 5173 QualType Ty = E->getType(); 5174 if (const auto *ASE = 5175 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5176 LValue UpAddrLVal = 5177 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 5178 llvm::Value *UpAddr = 5179 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5180 llvm::Value *LowIntPtr = 5181 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5182 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5183 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5184 } else { 5185 Size = CGF.getTypeSize(Ty); 5186 } 5187 LValue Base = CGF.MakeAddrLValue( 5188 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5189 KmpDependInfoTy); 5190 // deps[i].base_addr = &<Dependences[i].second>; 5191 LValue BaseAddrLVal = CGF.EmitLValueForField( 5192 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5193 CGF.EmitStoreOfScalar( 5194 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5195 BaseAddrLVal); 5196 // deps[i].len = sizeof(<Dependences[i].second>); 5197 LValue LenLVal = CGF.EmitLValueForField( 5198 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5199 CGF.EmitStoreOfScalar(Size, LenLVal); 5200 // deps[i].flags = <Dependences[i].first>; 5201 RTLDependenceKindTy DepKind; 5202 switch (Data.Dependences[I].first) { 5203 case OMPC_DEPEND_in: 5204 DepKind = DepIn; 5205 break; 5206 // Out and InOut dependencies must use the same code. 5207 case OMPC_DEPEND_out: 5208 case OMPC_DEPEND_inout: 5209 DepKind = DepInOut; 5210 break; 5211 case OMPC_DEPEND_mutexinoutset: 5212 DepKind = DepMutexInOutSet; 5213 break; 5214 case OMPC_DEPEND_source: 5215 case OMPC_DEPEND_sink: 5216 case OMPC_DEPEND_unknown: 5217 llvm_unreachable("Unknown task dependence type"); 5218 } 5219 LValue FlagsLVal = CGF.EmitLValueForField( 5220 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5221 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5222 FlagsLVal); 5223 } 5224 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5225 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5226 } 5227 5228 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5229 // libcall. 5230 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5231 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5232 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5233 // list is not empty 5234 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5235 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5236 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5237 llvm::Value *DepTaskArgs[7]; 5238 if (NumDependencies) { 5239 DepTaskArgs[0] = UpLoc; 5240 DepTaskArgs[1] = ThreadID; 5241 DepTaskArgs[2] = NewTask; 5242 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5243 DepTaskArgs[4] = DependenciesArray.getPointer(); 5244 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5245 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5246 } 5247 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5248 &TaskArgs, 5249 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5250 if (!Data.Tied) { 5251 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5252 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5253 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5254 } 5255 if (NumDependencies) { 5256 CGF.EmitRuntimeCall( 5257 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5258 } else { 5259 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5260 TaskArgs); 5261 } 5262 // Check if parent region is untied and build return for untied task; 5263 if (auto *Region = 5264 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5265 Region->emitUntiedSwitch(CGF); 5266 }; 5267 5268 llvm::Value *DepWaitTaskArgs[6]; 5269 if (NumDependencies) { 5270 DepWaitTaskArgs[0] = UpLoc; 5271 DepWaitTaskArgs[1] = ThreadID; 5272 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5273 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5274 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5275 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5276 } 5277 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5278 NumDependencies, &DepWaitTaskArgs, 5279 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5280 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5281 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5282 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5283 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5284 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5285 // is specified. 5286 if (NumDependencies) 5287 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5288 DepWaitTaskArgs); 5289 // Call proxy_task_entry(gtid, new_task); 5290 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5291 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5292 Action.Enter(CGF); 5293 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5294 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5295 OutlinedFnArgs); 5296 }; 5297 5298 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5299 // kmp_task_t *new_task); 5300 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5301 // kmp_task_t *new_task); 5302 RegionCodeGenTy RCG(CodeGen); 5303 CommonActionTy Action( 5304 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5305 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5306 RCG.setAction(Action); 5307 RCG(CGF); 5308 }; 5309 5310 if (IfCond) { 5311 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5312 } else { 5313 RegionCodeGenTy ThenRCG(ThenCodeGen); 5314 ThenRCG(CGF); 5315 } 5316 } 5317 5318 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5319 const OMPLoopDirective &D, 5320 llvm::Function *TaskFunction, 5321 QualType SharedsTy, Address Shareds, 5322 const Expr *IfCond, 5323 const OMPTaskDataTy &Data) { 5324 if (!CGF.HaveInsertPoint()) 5325 return; 5326 TaskResultTy Result = 5327 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5328 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5329 // libcall. 5330 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5331 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5332 // sched, kmp_uint64 grainsize, void *task_dup); 5333 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5334 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5335 llvm::Value *IfVal; 5336 if (IfCond) { 5337 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5338 /*isSigned=*/true); 5339 } else { 5340 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5341 } 5342 5343 LValue LBLVal = CGF.EmitLValueForField( 5344 Result.TDBase, 5345 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5346 const auto *LBVar = 5347 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5348 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5349 /*IsInitializer=*/true); 5350 LValue UBLVal = CGF.EmitLValueForField( 5351 Result.TDBase, 5352 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5353 const auto *UBVar = 5354 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5355 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5356 /*IsInitializer=*/true); 5357 LValue StLVal = CGF.EmitLValueForField( 5358 Result.TDBase, 5359 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5360 const auto *StVar = 5361 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5362 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5363 /*IsInitializer=*/true); 5364 // Store reductions address. 5365 LValue RedLVal = CGF.EmitLValueForField( 5366 Result.TDBase, 5367 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5368 if (Data.Reductions) { 5369 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5370 } else { 5371 CGF.EmitNullInitialization(RedLVal.getAddress(), 5372 CGF.getContext().VoidPtrTy); 5373 } 5374 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5375 llvm::Value *TaskArgs[] = { 5376 UpLoc, 5377 ThreadID, 5378 Result.NewTask, 5379 IfVal, 5380 LBLVal.getPointer(), 5381 UBLVal.getPointer(), 5382 CGF.EmitLoadOfScalar(StLVal, Loc), 5383 llvm::ConstantInt::getSigned( 5384 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5385 llvm::ConstantInt::getSigned( 5386 CGF.IntTy, Data.Schedule.getPointer() 5387 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5388 : NoSchedule), 5389 Data.Schedule.getPointer() 5390 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5391 /*isSigned=*/false) 5392 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5393 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5394 Result.TaskDupFn, CGF.VoidPtrTy) 5395 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5396 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5397 } 5398 5399 /// Emit reduction operation for each element of array (required for 5400 /// array sections) LHS op = RHS. 5401 /// \param Type Type of array. 5402 /// \param LHSVar Variable on the left side of the reduction operation 5403 /// (references element of array in original variable). 5404 /// \param RHSVar Variable on the right side of the reduction operation 5405 /// (references element of array in original variable). 5406 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5407 /// RHSVar. 5408 static void EmitOMPAggregateReduction( 5409 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5410 const VarDecl *RHSVar, 5411 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5412 const Expr *, const Expr *)> &RedOpGen, 5413 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5414 const Expr *UpExpr = nullptr) { 5415 // Perform element-by-element initialization. 5416 QualType ElementTy; 5417 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5418 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5419 5420 // Drill down to the base element type on both arrays. 5421 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5422 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5423 5424 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5425 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5426 // Cast from pointer to array type to pointer to single element. 5427 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5428 // The basic structure here is a while-do loop. 5429 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5430 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5431 llvm::Value *IsEmpty = 5432 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5433 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5434 5435 // Enter the loop body, making that address the current address. 5436 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5437 CGF.EmitBlock(BodyBB); 5438 5439 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5440 5441 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5442 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5443 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5444 Address RHSElementCurrent = 5445 Address(RHSElementPHI, 5446 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5447 5448 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5449 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5450 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5451 Address LHSElementCurrent = 5452 Address(LHSElementPHI, 5453 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5454 5455 // Emit copy. 5456 CodeGenFunction::OMPPrivateScope Scope(CGF); 5457 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5458 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5459 Scope.Privatize(); 5460 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5461 Scope.ForceCleanup(); 5462 5463 // Shift the address forward by one element. 5464 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5465 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5466 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5467 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5468 // Check whether we've reached the end. 5469 llvm::Value *Done = 5470 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5471 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5472 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5473 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5474 5475 // Done. 5476 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5477 } 5478 5479 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5480 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5481 /// UDR combiner function. 5482 static void emitReductionCombiner(CodeGenFunction &CGF, 5483 const Expr *ReductionOp) { 5484 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5485 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5486 if (const auto *DRE = 5487 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5488 if (const auto *DRD = 5489 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5490 std::pair<llvm::Function *, llvm::Function *> Reduction = 5491 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5492 RValue Func = RValue::get(Reduction.first); 5493 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5494 CGF.EmitIgnoredExpr(ReductionOp); 5495 return; 5496 } 5497 CGF.EmitIgnoredExpr(ReductionOp); 5498 } 5499 5500 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5501 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5502 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5503 ArrayRef<const Expr *> ReductionOps) { 5504 ASTContext &C = CGM.getContext(); 5505 5506 // void reduction_func(void *LHSArg, void *RHSArg); 5507 FunctionArgList Args; 5508 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5509 ImplicitParamDecl::Other); 5510 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5511 ImplicitParamDecl::Other); 5512 Args.push_back(&LHSArg); 5513 Args.push_back(&RHSArg); 5514 const auto &CGFI = 5515 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5516 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5517 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5518 llvm::GlobalValue::InternalLinkage, Name, 5519 &CGM.getModule()); 5520 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5521 Fn->setDoesNotRecurse(); 5522 CodeGenFunction CGF(CGM); 5523 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5524 5525 // Dst = (void*[n])(LHSArg); 5526 // Src = (void*[n])(RHSArg); 5527 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5528 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5529 ArgsType), CGF.getPointerAlign()); 5530 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5531 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5532 ArgsType), CGF.getPointerAlign()); 5533 5534 // ... 5535 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5536 // ... 5537 CodeGenFunction::OMPPrivateScope Scope(CGF); 5538 auto IPriv = Privates.begin(); 5539 unsigned Idx = 0; 5540 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5541 const auto *RHSVar = 5542 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5543 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5544 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5545 }); 5546 const auto *LHSVar = 5547 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5548 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5549 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5550 }); 5551 QualType PrivTy = (*IPriv)->getType(); 5552 if (PrivTy->isVariablyModifiedType()) { 5553 // Get array size and emit VLA type. 5554 ++Idx; 5555 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5556 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5557 const VariableArrayType *VLA = 5558 CGF.getContext().getAsVariableArrayType(PrivTy); 5559 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5560 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5561 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5562 CGF.EmitVariablyModifiedType(PrivTy); 5563 } 5564 } 5565 Scope.Privatize(); 5566 IPriv = Privates.begin(); 5567 auto ILHS = LHSExprs.begin(); 5568 auto IRHS = RHSExprs.begin(); 5569 for (const Expr *E : ReductionOps) { 5570 if ((*IPriv)->getType()->isArrayType()) { 5571 // Emit reduction for array section. 5572 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5573 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5574 EmitOMPAggregateReduction( 5575 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5576 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5577 emitReductionCombiner(CGF, E); 5578 }); 5579 } else { 5580 // Emit reduction for array subscript or single variable. 5581 emitReductionCombiner(CGF, E); 5582 } 5583 ++IPriv; 5584 ++ILHS; 5585 ++IRHS; 5586 } 5587 Scope.ForceCleanup(); 5588 CGF.FinishFunction(); 5589 return Fn; 5590 } 5591 5592 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5593 const Expr *ReductionOp, 5594 const Expr *PrivateRef, 5595 const DeclRefExpr *LHS, 5596 const DeclRefExpr *RHS) { 5597 if (PrivateRef->getType()->isArrayType()) { 5598 // Emit reduction for array section. 5599 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5600 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5601 EmitOMPAggregateReduction( 5602 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5603 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5604 emitReductionCombiner(CGF, ReductionOp); 5605 }); 5606 } else { 5607 // Emit reduction for array subscript or single variable. 5608 emitReductionCombiner(CGF, ReductionOp); 5609 } 5610 } 5611 5612 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5613 ArrayRef<const Expr *> Privates, 5614 ArrayRef<const Expr *> LHSExprs, 5615 ArrayRef<const Expr *> RHSExprs, 5616 ArrayRef<const Expr *> ReductionOps, 5617 ReductionOptionsTy Options) { 5618 if (!CGF.HaveInsertPoint()) 5619 return; 5620 5621 bool WithNowait = Options.WithNowait; 5622 bool SimpleReduction = Options.SimpleReduction; 5623 5624 // Next code should be emitted for reduction: 5625 // 5626 // static kmp_critical_name lock = { 0 }; 5627 // 5628 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5629 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5630 // ... 5631 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5632 // *(Type<n>-1*)rhs[<n>-1]); 5633 // } 5634 // 5635 // ... 5636 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5637 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5638 // RedList, reduce_func, &<lock>)) { 5639 // case 1: 5640 // ... 5641 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5642 // ... 5643 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5644 // break; 5645 // case 2: 5646 // ... 5647 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5648 // ... 5649 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5650 // break; 5651 // default:; 5652 // } 5653 // 5654 // if SimpleReduction is true, only the next code is generated: 5655 // ... 5656 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5657 // ... 5658 5659 ASTContext &C = CGM.getContext(); 5660 5661 if (SimpleReduction) { 5662 CodeGenFunction::RunCleanupsScope Scope(CGF); 5663 auto IPriv = Privates.begin(); 5664 auto ILHS = LHSExprs.begin(); 5665 auto IRHS = RHSExprs.begin(); 5666 for (const Expr *E : ReductionOps) { 5667 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5668 cast<DeclRefExpr>(*IRHS)); 5669 ++IPriv; 5670 ++ILHS; 5671 ++IRHS; 5672 } 5673 return; 5674 } 5675 5676 // 1. Build a list of reduction variables. 5677 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5678 auto Size = RHSExprs.size(); 5679 for (const Expr *E : Privates) { 5680 if (E->getType()->isVariablyModifiedType()) 5681 // Reserve place for array size. 5682 ++Size; 5683 } 5684 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5685 QualType ReductionArrayTy = 5686 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5687 /*IndexTypeQuals=*/0); 5688 Address ReductionList = 5689 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5690 auto IPriv = Privates.begin(); 5691 unsigned Idx = 0; 5692 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5693 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5694 CGF.Builder.CreateStore( 5695 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5696 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5697 Elem); 5698 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5699 // Store array size. 5700 ++Idx; 5701 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5702 llvm::Value *Size = CGF.Builder.CreateIntCast( 5703 CGF.getVLASize( 5704 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5705 .NumElts, 5706 CGF.SizeTy, /*isSigned=*/false); 5707 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5708 Elem); 5709 } 5710 } 5711 5712 // 2. Emit reduce_func(). 5713 llvm::Function *ReductionFn = emitReductionFunction( 5714 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5715 LHSExprs, RHSExprs, ReductionOps); 5716 5717 // 3. Create static kmp_critical_name lock = { 0 }; 5718 std::string Name = getName({"reduction"}); 5719 llvm::Value *Lock = getCriticalRegionLock(Name); 5720 5721 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5722 // RedList, reduce_func, &<lock>); 5723 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5724 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5725 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5726 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5727 ReductionList.getPointer(), CGF.VoidPtrTy); 5728 llvm::Value *Args[] = { 5729 IdentTLoc, // ident_t *<loc> 5730 ThreadId, // i32 <gtid> 5731 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5732 ReductionArrayTySize, // size_type sizeof(RedList) 5733 RL, // void *RedList 5734 ReductionFn, // void (*) (void *, void *) <reduce_func> 5735 Lock // kmp_critical_name *&<lock> 5736 }; 5737 llvm::Value *Res = CGF.EmitRuntimeCall( 5738 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5739 : OMPRTL__kmpc_reduce), 5740 Args); 5741 5742 // 5. Build switch(res) 5743 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5744 llvm::SwitchInst *SwInst = 5745 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5746 5747 // 6. Build case 1: 5748 // ... 5749 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5750 // ... 5751 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5752 // break; 5753 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5754 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5755 CGF.EmitBlock(Case1BB); 5756 5757 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5758 llvm::Value *EndArgs[] = { 5759 IdentTLoc, // ident_t *<loc> 5760 ThreadId, // i32 <gtid> 5761 Lock // kmp_critical_name *&<lock> 5762 }; 5763 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5764 CodeGenFunction &CGF, PrePostActionTy &Action) { 5765 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5766 auto IPriv = Privates.begin(); 5767 auto ILHS = LHSExprs.begin(); 5768 auto IRHS = RHSExprs.begin(); 5769 for (const Expr *E : ReductionOps) { 5770 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5771 cast<DeclRefExpr>(*IRHS)); 5772 ++IPriv; 5773 ++ILHS; 5774 ++IRHS; 5775 } 5776 }; 5777 RegionCodeGenTy RCG(CodeGen); 5778 CommonActionTy Action( 5779 nullptr, llvm::None, 5780 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5781 : OMPRTL__kmpc_end_reduce), 5782 EndArgs); 5783 RCG.setAction(Action); 5784 RCG(CGF); 5785 5786 CGF.EmitBranch(DefaultBB); 5787 5788 // 7. Build case 2: 5789 // ... 5790 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5791 // ... 5792 // break; 5793 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5794 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5795 CGF.EmitBlock(Case2BB); 5796 5797 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5798 CodeGenFunction &CGF, PrePostActionTy &Action) { 5799 auto ILHS = LHSExprs.begin(); 5800 auto IRHS = RHSExprs.begin(); 5801 auto IPriv = Privates.begin(); 5802 for (const Expr *E : ReductionOps) { 5803 const Expr *XExpr = nullptr; 5804 const Expr *EExpr = nullptr; 5805 const Expr *UpExpr = nullptr; 5806 BinaryOperatorKind BO = BO_Comma; 5807 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5808 if (BO->getOpcode() == BO_Assign) { 5809 XExpr = BO->getLHS(); 5810 UpExpr = BO->getRHS(); 5811 } 5812 } 5813 // Try to emit update expression as a simple atomic. 5814 const Expr *RHSExpr = UpExpr; 5815 if (RHSExpr) { 5816 // Analyze RHS part of the whole expression. 5817 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5818 RHSExpr->IgnoreParenImpCasts())) { 5819 // If this is a conditional operator, analyze its condition for 5820 // min/max reduction operator. 5821 RHSExpr = ACO->getCond(); 5822 } 5823 if (const auto *BORHS = 5824 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5825 EExpr = BORHS->getRHS(); 5826 BO = BORHS->getOpcode(); 5827 } 5828 } 5829 if (XExpr) { 5830 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5831 auto &&AtomicRedGen = [BO, VD, 5832 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5833 const Expr *EExpr, const Expr *UpExpr) { 5834 LValue X = CGF.EmitLValue(XExpr); 5835 RValue E; 5836 if (EExpr) 5837 E = CGF.EmitAnyExpr(EExpr); 5838 CGF.EmitOMPAtomicSimpleUpdateExpr( 5839 X, E, BO, /*IsXLHSInRHSPart=*/true, 5840 llvm::AtomicOrdering::Monotonic, Loc, 5841 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5842 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5843 PrivateScope.addPrivate( 5844 VD, [&CGF, VD, XRValue, Loc]() { 5845 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5846 CGF.emitOMPSimpleStore( 5847 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5848 VD->getType().getNonReferenceType(), Loc); 5849 return LHSTemp; 5850 }); 5851 (void)PrivateScope.Privatize(); 5852 return CGF.EmitAnyExpr(UpExpr); 5853 }); 5854 }; 5855 if ((*IPriv)->getType()->isArrayType()) { 5856 // Emit atomic reduction for array section. 5857 const auto *RHSVar = 5858 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5859 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5860 AtomicRedGen, XExpr, EExpr, UpExpr); 5861 } else { 5862 // Emit atomic reduction for array subscript or single variable. 5863 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5864 } 5865 } else { 5866 // Emit as a critical region. 5867 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5868 const Expr *, const Expr *) { 5869 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5870 std::string Name = RT.getName({"atomic_reduction"}); 5871 RT.emitCriticalRegion( 5872 CGF, Name, 5873 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5874 Action.Enter(CGF); 5875 emitReductionCombiner(CGF, E); 5876 }, 5877 Loc); 5878 }; 5879 if ((*IPriv)->getType()->isArrayType()) { 5880 const auto *LHSVar = 5881 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5882 const auto *RHSVar = 5883 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5884 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5885 CritRedGen); 5886 } else { 5887 CritRedGen(CGF, nullptr, nullptr, nullptr); 5888 } 5889 } 5890 ++ILHS; 5891 ++IRHS; 5892 ++IPriv; 5893 } 5894 }; 5895 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5896 if (!WithNowait) { 5897 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5898 llvm::Value *EndArgs[] = { 5899 IdentTLoc, // ident_t *<loc> 5900 ThreadId, // i32 <gtid> 5901 Lock // kmp_critical_name *&<lock> 5902 }; 5903 CommonActionTy Action(nullptr, llvm::None, 5904 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5905 EndArgs); 5906 AtomicRCG.setAction(Action); 5907 AtomicRCG(CGF); 5908 } else { 5909 AtomicRCG(CGF); 5910 } 5911 5912 CGF.EmitBranch(DefaultBB); 5913 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5914 } 5915 5916 /// Generates unique name for artificial threadprivate variables. 5917 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5918 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5919 const Expr *Ref) { 5920 SmallString<256> Buffer; 5921 llvm::raw_svector_ostream Out(Buffer); 5922 const clang::DeclRefExpr *DE; 5923 const VarDecl *D = ::getBaseDecl(Ref, DE); 5924 if (!D) 5925 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5926 D = D->getCanonicalDecl(); 5927 std::string Name = CGM.getOpenMPRuntime().getName( 5928 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5929 Out << Prefix << Name << "_" 5930 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5931 return Out.str(); 5932 } 5933 5934 /// Emits reduction initializer function: 5935 /// \code 5936 /// void @.red_init(void* %arg) { 5937 /// %0 = bitcast void* %arg to <type>* 5938 /// store <type> <init>, <type>* %0 5939 /// ret void 5940 /// } 5941 /// \endcode 5942 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5943 SourceLocation Loc, 5944 ReductionCodeGen &RCG, unsigned N) { 5945 ASTContext &C = CGM.getContext(); 5946 FunctionArgList Args; 5947 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5948 ImplicitParamDecl::Other); 5949 Args.emplace_back(&Param); 5950 const auto &FnInfo = 5951 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5952 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5953 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5954 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5955 Name, &CGM.getModule()); 5956 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5957 Fn->setDoesNotRecurse(); 5958 CodeGenFunction CGF(CGM); 5959 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5960 Address PrivateAddr = CGF.EmitLoadOfPointer( 5961 CGF.GetAddrOfLocalVar(&Param), 5962 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5963 llvm::Value *Size = nullptr; 5964 // If the size of the reduction item is non-constant, load it from global 5965 // threadprivate variable. 5966 if (RCG.getSizes(N).second) { 5967 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5968 CGF, CGM.getContext().getSizeType(), 5969 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5970 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5971 CGM.getContext().getSizeType(), Loc); 5972 } 5973 RCG.emitAggregateType(CGF, N, Size); 5974 LValue SharedLVal; 5975 // If initializer uses initializer from declare reduction construct, emit a 5976 // pointer to the address of the original reduction item (reuired by reduction 5977 // initializer) 5978 if (RCG.usesReductionInitializer(N)) { 5979 Address SharedAddr = 5980 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5981 CGF, CGM.getContext().VoidPtrTy, 5982 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5983 SharedAddr = CGF.EmitLoadOfPointer( 5984 SharedAddr, 5985 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5986 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5987 } else { 5988 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5989 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5990 CGM.getContext().VoidPtrTy); 5991 } 5992 // Emit the initializer: 5993 // %0 = bitcast void* %arg to <type>* 5994 // store <type> <init>, <type>* %0 5995 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5996 [](CodeGenFunction &) { return false; }); 5997 CGF.FinishFunction(); 5998 return Fn; 5999 } 6000 6001 /// Emits reduction combiner function: 6002 /// \code 6003 /// void @.red_comb(void* %arg0, void* %arg1) { 6004 /// %lhs = bitcast void* %arg0 to <type>* 6005 /// %rhs = bitcast void* %arg1 to <type>* 6006 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6007 /// store <type> %2, <type>* %lhs 6008 /// ret void 6009 /// } 6010 /// \endcode 6011 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6012 SourceLocation Loc, 6013 ReductionCodeGen &RCG, unsigned N, 6014 const Expr *ReductionOp, 6015 const Expr *LHS, const Expr *RHS, 6016 const Expr *PrivateRef) { 6017 ASTContext &C = CGM.getContext(); 6018 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6019 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6020 FunctionArgList Args; 6021 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6022 C.VoidPtrTy, ImplicitParamDecl::Other); 6023 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6024 ImplicitParamDecl::Other); 6025 Args.emplace_back(&ParamInOut); 6026 Args.emplace_back(&ParamIn); 6027 const auto &FnInfo = 6028 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6029 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6030 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6031 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6032 Name, &CGM.getModule()); 6033 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6034 Fn->setDoesNotRecurse(); 6035 CodeGenFunction CGF(CGM); 6036 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6037 llvm::Value *Size = nullptr; 6038 // If the size of the reduction item is non-constant, load it from global 6039 // threadprivate variable. 6040 if (RCG.getSizes(N).second) { 6041 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6042 CGF, CGM.getContext().getSizeType(), 6043 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6044 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6045 CGM.getContext().getSizeType(), Loc); 6046 } 6047 RCG.emitAggregateType(CGF, N, Size); 6048 // Remap lhs and rhs variables to the addresses of the function arguments. 6049 // %lhs = bitcast void* %arg0 to <type>* 6050 // %rhs = bitcast void* %arg1 to <type>* 6051 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6052 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6053 // Pull out the pointer to the variable. 6054 Address PtrAddr = CGF.EmitLoadOfPointer( 6055 CGF.GetAddrOfLocalVar(&ParamInOut), 6056 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6057 return CGF.Builder.CreateElementBitCast( 6058 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6059 }); 6060 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6061 // Pull out the pointer to the variable. 6062 Address PtrAddr = CGF.EmitLoadOfPointer( 6063 CGF.GetAddrOfLocalVar(&ParamIn), 6064 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6065 return CGF.Builder.CreateElementBitCast( 6066 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6067 }); 6068 PrivateScope.Privatize(); 6069 // Emit the combiner body: 6070 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6071 // store <type> %2, <type>* %lhs 6072 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6073 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6074 cast<DeclRefExpr>(RHS)); 6075 CGF.FinishFunction(); 6076 return Fn; 6077 } 6078 6079 /// Emits reduction finalizer function: 6080 /// \code 6081 /// void @.red_fini(void* %arg) { 6082 /// %0 = bitcast void* %arg to <type>* 6083 /// <destroy>(<type>* %0) 6084 /// ret void 6085 /// } 6086 /// \endcode 6087 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6088 SourceLocation Loc, 6089 ReductionCodeGen &RCG, unsigned N) { 6090 if (!RCG.needCleanups(N)) 6091 return nullptr; 6092 ASTContext &C = CGM.getContext(); 6093 FunctionArgList Args; 6094 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6095 ImplicitParamDecl::Other); 6096 Args.emplace_back(&Param); 6097 const auto &FnInfo = 6098 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6099 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6100 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6101 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6102 Name, &CGM.getModule()); 6103 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6104 Fn->setDoesNotRecurse(); 6105 CodeGenFunction CGF(CGM); 6106 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6107 Address PrivateAddr = CGF.EmitLoadOfPointer( 6108 CGF.GetAddrOfLocalVar(&Param), 6109 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6110 llvm::Value *Size = nullptr; 6111 // If the size of the reduction item is non-constant, load it from global 6112 // threadprivate variable. 6113 if (RCG.getSizes(N).second) { 6114 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6115 CGF, CGM.getContext().getSizeType(), 6116 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6117 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6118 CGM.getContext().getSizeType(), Loc); 6119 } 6120 RCG.emitAggregateType(CGF, N, Size); 6121 // Emit the finalizer body: 6122 // <destroy>(<type>* %0) 6123 RCG.emitCleanups(CGF, N, PrivateAddr); 6124 CGF.FinishFunction(); 6125 return Fn; 6126 } 6127 6128 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6129 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6130 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6131 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6132 return nullptr; 6133 6134 // Build typedef struct: 6135 // kmp_task_red_input { 6136 // void *reduce_shar; // shared reduction item 6137 // size_t reduce_size; // size of data item 6138 // void *reduce_init; // data initialization routine 6139 // void *reduce_fini; // data finalization routine 6140 // void *reduce_comb; // data combiner routine 6141 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6142 // } kmp_task_red_input_t; 6143 ASTContext &C = CGM.getContext(); 6144 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6145 RD->startDefinition(); 6146 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6147 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6148 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6149 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6150 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6151 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6152 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6153 RD->completeDefinition(); 6154 QualType RDType = C.getRecordType(RD); 6155 unsigned Size = Data.ReductionVars.size(); 6156 llvm::APInt ArraySize(/*numBits=*/64, Size); 6157 QualType ArrayRDType = C.getConstantArrayType( 6158 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6159 // kmp_task_red_input_t .rd_input.[Size]; 6160 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6161 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6162 Data.ReductionOps); 6163 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6164 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6165 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6166 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6167 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6168 TaskRedInput.getPointer(), Idxs, 6169 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6170 ".rd_input.gep."); 6171 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6172 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6173 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6174 RCG.emitSharedLValue(CGF, Cnt); 6175 llvm::Value *CastedShared = 6176 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6177 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6178 RCG.emitAggregateType(CGF, Cnt); 6179 llvm::Value *SizeValInChars; 6180 llvm::Value *SizeVal; 6181 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6182 // We use delayed creation/initialization for VLAs, array sections and 6183 // custom reduction initializations. It is required because runtime does not 6184 // provide the way to pass the sizes of VLAs/array sections to 6185 // initializer/combiner/finalizer functions and does not pass the pointer to 6186 // original reduction item to the initializer. Instead threadprivate global 6187 // variables are used to store these values and use them in the functions. 6188 bool DelayedCreation = !!SizeVal; 6189 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6190 /*isSigned=*/false); 6191 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6192 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6193 // ElemLVal.reduce_init = init; 6194 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6195 llvm::Value *InitAddr = 6196 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6197 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6198 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6199 // ElemLVal.reduce_fini = fini; 6200 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6201 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6202 llvm::Value *FiniAddr = Fini 6203 ? CGF.EmitCastToVoidPtr(Fini) 6204 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6205 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6206 // ElemLVal.reduce_comb = comb; 6207 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6208 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6209 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6210 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6211 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6212 // ElemLVal.flags = 0; 6213 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6214 if (DelayedCreation) { 6215 CGF.EmitStoreOfScalar( 6216 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 6217 FlagsLVal); 6218 } else 6219 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6220 } 6221 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6222 // *data); 6223 llvm::Value *Args[] = { 6224 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6225 /*isSigned=*/true), 6226 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6227 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6228 CGM.VoidPtrTy)}; 6229 return CGF.EmitRuntimeCall( 6230 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6231 } 6232 6233 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6234 SourceLocation Loc, 6235 ReductionCodeGen &RCG, 6236 unsigned N) { 6237 auto Sizes = RCG.getSizes(N); 6238 // Emit threadprivate global variable if the type is non-constant 6239 // (Sizes.second = nullptr). 6240 if (Sizes.second) { 6241 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6242 /*isSigned=*/false); 6243 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6244 CGF, CGM.getContext().getSizeType(), 6245 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6246 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6247 } 6248 // Store address of the original reduction item if custom initializer is used. 6249 if (RCG.usesReductionInitializer(N)) { 6250 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6251 CGF, CGM.getContext().VoidPtrTy, 6252 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6253 CGF.Builder.CreateStore( 6254 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6255 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6256 SharedAddr, /*IsVolatile=*/false); 6257 } 6258 } 6259 6260 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6261 SourceLocation Loc, 6262 llvm::Value *ReductionsPtr, 6263 LValue SharedLVal) { 6264 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6265 // *d); 6266 llvm::Value *Args[] = { 6267 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6268 /*isSigned=*/true), 6269 ReductionsPtr, 6270 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6271 CGM.VoidPtrTy)}; 6272 return Address( 6273 CGF.EmitRuntimeCall( 6274 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6275 SharedLVal.getAlignment()); 6276 } 6277 6278 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6279 SourceLocation Loc) { 6280 if (!CGF.HaveInsertPoint()) 6281 return; 6282 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6283 // global_tid); 6284 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6285 // Ignore return result until untied tasks are supported. 6286 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6287 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6288 Region->emitUntiedSwitch(CGF); 6289 } 6290 6291 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6292 OpenMPDirectiveKind InnerKind, 6293 const RegionCodeGenTy &CodeGen, 6294 bool HasCancel) { 6295 if (!CGF.HaveInsertPoint()) 6296 return; 6297 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6298 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6299 } 6300 6301 namespace { 6302 enum RTCancelKind { 6303 CancelNoreq = 0, 6304 CancelParallel = 1, 6305 CancelLoop = 2, 6306 CancelSections = 3, 6307 CancelTaskgroup = 4 6308 }; 6309 } // anonymous namespace 6310 6311 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6312 RTCancelKind CancelKind = CancelNoreq; 6313 if (CancelRegion == OMPD_parallel) 6314 CancelKind = CancelParallel; 6315 else if (CancelRegion == OMPD_for) 6316 CancelKind = CancelLoop; 6317 else if (CancelRegion == OMPD_sections) 6318 CancelKind = CancelSections; 6319 else { 6320 assert(CancelRegion == OMPD_taskgroup); 6321 CancelKind = CancelTaskgroup; 6322 } 6323 return CancelKind; 6324 } 6325 6326 void CGOpenMPRuntime::emitCancellationPointCall( 6327 CodeGenFunction &CGF, SourceLocation Loc, 6328 OpenMPDirectiveKind CancelRegion) { 6329 if (!CGF.HaveInsertPoint()) 6330 return; 6331 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6332 // global_tid, kmp_int32 cncl_kind); 6333 if (auto *OMPRegionInfo = 6334 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6335 // For 'cancellation point taskgroup', the task region info may not have a 6336 // cancel. This may instead happen in another adjacent task. 6337 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6338 llvm::Value *Args[] = { 6339 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6340 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6341 // Ignore return result until untied tasks are supported. 6342 llvm::Value *Result = CGF.EmitRuntimeCall( 6343 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6344 // if (__kmpc_cancellationpoint()) { 6345 // exit from construct; 6346 // } 6347 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6348 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6349 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6350 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6351 CGF.EmitBlock(ExitBB); 6352 // exit from construct; 6353 CodeGenFunction::JumpDest CancelDest = 6354 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6355 CGF.EmitBranchThroughCleanup(CancelDest); 6356 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6357 } 6358 } 6359 } 6360 6361 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6362 const Expr *IfCond, 6363 OpenMPDirectiveKind CancelRegion) { 6364 if (!CGF.HaveInsertPoint()) 6365 return; 6366 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6367 // kmp_int32 cncl_kind); 6368 if (auto *OMPRegionInfo = 6369 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6370 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6371 PrePostActionTy &) { 6372 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6373 llvm::Value *Args[] = { 6374 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6375 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6376 // Ignore return result until untied tasks are supported. 6377 llvm::Value *Result = CGF.EmitRuntimeCall( 6378 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6379 // if (__kmpc_cancel()) { 6380 // exit from construct; 6381 // } 6382 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6383 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6384 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6385 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6386 CGF.EmitBlock(ExitBB); 6387 // exit from construct; 6388 CodeGenFunction::JumpDest CancelDest = 6389 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6390 CGF.EmitBranchThroughCleanup(CancelDest); 6391 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6392 }; 6393 if (IfCond) { 6394 emitOMPIfClause(CGF, IfCond, ThenGen, 6395 [](CodeGenFunction &, PrePostActionTy &) {}); 6396 } else { 6397 RegionCodeGenTy ThenRCG(ThenGen); 6398 ThenRCG(CGF); 6399 } 6400 } 6401 } 6402 6403 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6404 const OMPExecutableDirective &D, StringRef ParentName, 6405 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6406 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6407 assert(!ParentName.empty() && "Invalid target region parent name!"); 6408 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6409 IsOffloadEntry, CodeGen); 6410 } 6411 6412 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6413 const OMPExecutableDirective &D, StringRef ParentName, 6414 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6415 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6416 // Create a unique name for the entry function using the source location 6417 // information of the current target region. The name will be something like: 6418 // 6419 // __omp_offloading_DD_FFFF_PP_lBB 6420 // 6421 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6422 // mangled name of the function that encloses the target region and BB is the 6423 // line number of the target region. 6424 6425 unsigned DeviceID; 6426 unsigned FileID; 6427 unsigned Line; 6428 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6429 Line); 6430 SmallString<64> EntryFnName; 6431 { 6432 llvm::raw_svector_ostream OS(EntryFnName); 6433 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6434 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6435 } 6436 6437 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6438 6439 CodeGenFunction CGF(CGM, true); 6440 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6441 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6442 6443 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6444 6445 // If this target outline function is not an offload entry, we don't need to 6446 // register it. 6447 if (!IsOffloadEntry) 6448 return; 6449 6450 // The target region ID is used by the runtime library to identify the current 6451 // target region, so it only has to be unique and not necessarily point to 6452 // anything. It could be the pointer to the outlined function that implements 6453 // the target region, but we aren't using that so that the compiler doesn't 6454 // need to keep that, and could therefore inline the host function if proven 6455 // worthwhile during optimization. In the other hand, if emitting code for the 6456 // device, the ID has to be the function address so that it can retrieved from 6457 // the offloading entry and launched by the runtime library. We also mark the 6458 // outlined function to have external linkage in case we are emitting code for 6459 // the device, because these functions will be entry points to the device. 6460 6461 if (CGM.getLangOpts().OpenMPIsDevice) { 6462 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6463 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6464 OutlinedFn->setDSOLocal(false); 6465 } else { 6466 std::string Name = getName({EntryFnName, "region_id"}); 6467 OutlinedFnID = new llvm::GlobalVariable( 6468 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6469 llvm::GlobalValue::WeakAnyLinkage, 6470 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6471 } 6472 6473 // Register the information for the entry associated with this target region. 6474 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6475 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6476 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6477 } 6478 6479 /// Checks if the expression is constant or does not have non-trivial function 6480 /// calls. 6481 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6482 // We can skip constant expressions. 6483 // We can skip expressions with trivial calls or simple expressions. 6484 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6485 !E->hasNonTrivialCall(Ctx)) && 6486 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6487 } 6488 6489 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6490 const Stmt *Body) { 6491 const Stmt *Child = Body->IgnoreContainers(); 6492 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6493 Child = nullptr; 6494 for (const Stmt *S : C->body()) { 6495 if (const auto *E = dyn_cast<Expr>(S)) { 6496 if (isTrivial(Ctx, E)) 6497 continue; 6498 } 6499 // Some of the statements can be ignored. 6500 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6501 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6502 continue; 6503 // Analyze declarations. 6504 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6505 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6506 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6507 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6508 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6509 isa<UsingDirectiveDecl>(D) || 6510 isa<OMPDeclareReductionDecl>(D) || 6511 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6512 return true; 6513 const auto *VD = dyn_cast<VarDecl>(D); 6514 if (!VD) 6515 return false; 6516 return VD->isConstexpr() || 6517 ((VD->getType().isTrivialType(Ctx) || 6518 VD->getType()->isReferenceType()) && 6519 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6520 })) 6521 continue; 6522 } 6523 // Found multiple children - cannot get the one child only. 6524 if (Child) 6525 return nullptr; 6526 Child = S; 6527 } 6528 if (Child) 6529 Child = Child->IgnoreContainers(); 6530 } 6531 return Child; 6532 } 6533 6534 /// Emit the number of teams for a target directive. Inspect the num_teams 6535 /// clause associated with a teams construct combined or closely nested 6536 /// with the target directive. 6537 /// 6538 /// Emit a team of size one for directives such as 'target parallel' that 6539 /// have no associated teams construct. 6540 /// 6541 /// Otherwise, return nullptr. 6542 static llvm::Value * 6543 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6544 const OMPExecutableDirective &D) { 6545 assert(!CGF.getLangOpts().OpenMPIsDevice && 6546 "Clauses associated with the teams directive expected to be emitted " 6547 "only for the host!"); 6548 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6549 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6550 "Expected target-based executable directive."); 6551 CGBuilderTy &Bld = CGF.Builder; 6552 switch (DirectiveKind) { 6553 case OMPD_target: { 6554 const auto *CS = D.getInnermostCapturedStmt(); 6555 const auto *Body = 6556 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6557 const Stmt *ChildStmt = 6558 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6559 if (const auto *NestedDir = 6560 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6561 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6562 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6563 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6564 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6565 const Expr *NumTeams = 6566 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6567 llvm::Value *NumTeamsVal = 6568 CGF.EmitScalarExpr(NumTeams, 6569 /*IgnoreResultAssign*/ true); 6570 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6571 /*IsSigned=*/true); 6572 } 6573 return Bld.getInt32(0); 6574 } 6575 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6576 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6577 return Bld.getInt32(1); 6578 return Bld.getInt32(0); 6579 } 6580 return nullptr; 6581 } 6582 case OMPD_target_teams: 6583 case OMPD_target_teams_distribute: 6584 case OMPD_target_teams_distribute_simd: 6585 case OMPD_target_teams_distribute_parallel_for: 6586 case OMPD_target_teams_distribute_parallel_for_simd: { 6587 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6588 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6589 const Expr *NumTeams = 6590 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6591 llvm::Value *NumTeamsVal = 6592 CGF.EmitScalarExpr(NumTeams, 6593 /*IgnoreResultAssign*/ true); 6594 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6595 /*IsSigned=*/true); 6596 } 6597 return Bld.getInt32(0); 6598 } 6599 case OMPD_target_parallel: 6600 case OMPD_target_parallel_for: 6601 case OMPD_target_parallel_for_simd: 6602 case OMPD_target_simd: 6603 return Bld.getInt32(1); 6604 case OMPD_parallel: 6605 case OMPD_for: 6606 case OMPD_parallel_for: 6607 case OMPD_parallel_sections: 6608 case OMPD_for_simd: 6609 case OMPD_parallel_for_simd: 6610 case OMPD_cancel: 6611 case OMPD_cancellation_point: 6612 case OMPD_ordered: 6613 case OMPD_threadprivate: 6614 case OMPD_allocate: 6615 case OMPD_task: 6616 case OMPD_simd: 6617 case OMPD_sections: 6618 case OMPD_section: 6619 case OMPD_single: 6620 case OMPD_master: 6621 case OMPD_critical: 6622 case OMPD_taskyield: 6623 case OMPD_barrier: 6624 case OMPD_taskwait: 6625 case OMPD_taskgroup: 6626 case OMPD_atomic: 6627 case OMPD_flush: 6628 case OMPD_teams: 6629 case OMPD_target_data: 6630 case OMPD_target_exit_data: 6631 case OMPD_target_enter_data: 6632 case OMPD_distribute: 6633 case OMPD_distribute_simd: 6634 case OMPD_distribute_parallel_for: 6635 case OMPD_distribute_parallel_for_simd: 6636 case OMPD_teams_distribute: 6637 case OMPD_teams_distribute_simd: 6638 case OMPD_teams_distribute_parallel_for: 6639 case OMPD_teams_distribute_parallel_for_simd: 6640 case OMPD_target_update: 6641 case OMPD_declare_simd: 6642 case OMPD_declare_target: 6643 case OMPD_end_declare_target: 6644 case OMPD_declare_reduction: 6645 case OMPD_declare_mapper: 6646 case OMPD_taskloop: 6647 case OMPD_taskloop_simd: 6648 case OMPD_requires: 6649 case OMPD_unknown: 6650 break; 6651 } 6652 llvm_unreachable("Unexpected directive kind."); 6653 } 6654 6655 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6656 llvm::Value *DefaultThreadLimitVal) { 6657 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6658 CGF.getContext(), CS->getCapturedStmt()); 6659 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6660 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6661 llvm::Value *NumThreads = nullptr; 6662 llvm::Value *CondVal = nullptr; 6663 // Handle if clause. If if clause present, the number of threads is 6664 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6665 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6666 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6667 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6668 const OMPIfClause *IfClause = nullptr; 6669 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6670 if (C->getNameModifier() == OMPD_unknown || 6671 C->getNameModifier() == OMPD_parallel) { 6672 IfClause = C; 6673 break; 6674 } 6675 } 6676 if (IfClause) { 6677 const Expr *Cond = IfClause->getCondition(); 6678 bool Result; 6679 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6680 if (!Result) 6681 return CGF.Builder.getInt32(1); 6682 } else { 6683 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6684 if (const auto *PreInit = 6685 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6686 for (const auto *I : PreInit->decls()) { 6687 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6688 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6689 } else { 6690 CodeGenFunction::AutoVarEmission Emission = 6691 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6692 CGF.EmitAutoVarCleanups(Emission); 6693 } 6694 } 6695 } 6696 CondVal = CGF.EvaluateExprAsBool(Cond); 6697 } 6698 } 6699 } 6700 // Check the value of num_threads clause iff if clause was not specified 6701 // or is not evaluated to false. 6702 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6703 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6704 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6705 const auto *NumThreadsClause = 6706 Dir->getSingleClause<OMPNumThreadsClause>(); 6707 CodeGenFunction::LexicalScope Scope( 6708 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6709 if (const auto *PreInit = 6710 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6711 for (const auto *I : PreInit->decls()) { 6712 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6713 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6714 } else { 6715 CodeGenFunction::AutoVarEmission Emission = 6716 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6717 CGF.EmitAutoVarCleanups(Emission); 6718 } 6719 } 6720 } 6721 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6722 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6723 /*IsSigned=*/false); 6724 if (DefaultThreadLimitVal) 6725 NumThreads = CGF.Builder.CreateSelect( 6726 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6727 DefaultThreadLimitVal, NumThreads); 6728 } else { 6729 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6730 : CGF.Builder.getInt32(0); 6731 } 6732 // Process condition of the if clause. 6733 if (CondVal) { 6734 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6735 CGF.Builder.getInt32(1)); 6736 } 6737 return NumThreads; 6738 } 6739 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6740 return CGF.Builder.getInt32(1); 6741 return DefaultThreadLimitVal; 6742 } 6743 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6744 : CGF.Builder.getInt32(0); 6745 } 6746 6747 /// Emit the number of threads for a target directive. Inspect the 6748 /// thread_limit clause associated with a teams construct combined or closely 6749 /// nested with the target directive. 6750 /// 6751 /// Emit the num_threads clause for directives such as 'target parallel' that 6752 /// have no associated teams construct. 6753 /// 6754 /// Otherwise, return nullptr. 6755 static llvm::Value * 6756 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6757 const OMPExecutableDirective &D) { 6758 assert(!CGF.getLangOpts().OpenMPIsDevice && 6759 "Clauses associated with the teams directive expected to be emitted " 6760 "only for the host!"); 6761 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6762 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6763 "Expected target-based executable directive."); 6764 CGBuilderTy &Bld = CGF.Builder; 6765 llvm::Value *ThreadLimitVal = nullptr; 6766 llvm::Value *NumThreadsVal = nullptr; 6767 switch (DirectiveKind) { 6768 case OMPD_target: { 6769 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6770 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6771 return NumThreads; 6772 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6773 CGF.getContext(), CS->getCapturedStmt()); 6774 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6775 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6776 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6777 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6778 const auto *ThreadLimitClause = 6779 Dir->getSingleClause<OMPThreadLimitClause>(); 6780 CodeGenFunction::LexicalScope Scope( 6781 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6782 if (const auto *PreInit = 6783 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6784 for (const auto *I : PreInit->decls()) { 6785 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6786 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6787 } else { 6788 CodeGenFunction::AutoVarEmission Emission = 6789 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6790 CGF.EmitAutoVarCleanups(Emission); 6791 } 6792 } 6793 } 6794 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6795 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6796 ThreadLimitVal = 6797 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6798 } 6799 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6800 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6801 CS = Dir->getInnermostCapturedStmt(); 6802 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6803 CGF.getContext(), CS->getCapturedStmt()); 6804 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6805 } 6806 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6807 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6808 CS = Dir->getInnermostCapturedStmt(); 6809 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6810 return NumThreads; 6811 } 6812 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6813 return Bld.getInt32(1); 6814 } 6815 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6816 } 6817 case OMPD_target_teams: { 6818 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6819 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6820 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6821 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6822 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6823 ThreadLimitVal = 6824 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6825 } 6826 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6827 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6828 return NumThreads; 6829 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6830 CGF.getContext(), CS->getCapturedStmt()); 6831 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6832 if (Dir->getDirectiveKind() == OMPD_distribute) { 6833 CS = Dir->getInnermostCapturedStmt(); 6834 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6835 return NumThreads; 6836 } 6837 } 6838 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6839 } 6840 case OMPD_target_teams_distribute: 6841 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6842 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6843 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6844 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6845 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6846 ThreadLimitVal = 6847 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6848 } 6849 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6850 case OMPD_target_parallel: 6851 case OMPD_target_parallel_for: 6852 case OMPD_target_parallel_for_simd: 6853 case OMPD_target_teams_distribute_parallel_for: 6854 case OMPD_target_teams_distribute_parallel_for_simd: { 6855 llvm::Value *CondVal = nullptr; 6856 // Handle if clause. If if clause present, the number of threads is 6857 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6858 if (D.hasClausesOfKind<OMPIfClause>()) { 6859 const OMPIfClause *IfClause = nullptr; 6860 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6861 if (C->getNameModifier() == OMPD_unknown || 6862 C->getNameModifier() == OMPD_parallel) { 6863 IfClause = C; 6864 break; 6865 } 6866 } 6867 if (IfClause) { 6868 const Expr *Cond = IfClause->getCondition(); 6869 bool Result; 6870 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6871 if (!Result) 6872 return Bld.getInt32(1); 6873 } else { 6874 CodeGenFunction::RunCleanupsScope Scope(CGF); 6875 CondVal = CGF.EvaluateExprAsBool(Cond); 6876 } 6877 } 6878 } 6879 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6880 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6881 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6882 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6883 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6884 ThreadLimitVal = 6885 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6886 } 6887 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6888 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6889 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6890 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6891 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6892 NumThreadsVal = 6893 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false); 6894 ThreadLimitVal = ThreadLimitVal 6895 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6896 ThreadLimitVal), 6897 NumThreadsVal, ThreadLimitVal) 6898 : NumThreadsVal; 6899 } 6900 if (!ThreadLimitVal) 6901 ThreadLimitVal = Bld.getInt32(0); 6902 if (CondVal) 6903 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6904 return ThreadLimitVal; 6905 } 6906 case OMPD_target_teams_distribute_simd: 6907 case OMPD_target_simd: 6908 return Bld.getInt32(1); 6909 case OMPD_parallel: 6910 case OMPD_for: 6911 case OMPD_parallel_for: 6912 case OMPD_parallel_sections: 6913 case OMPD_for_simd: 6914 case OMPD_parallel_for_simd: 6915 case OMPD_cancel: 6916 case OMPD_cancellation_point: 6917 case OMPD_ordered: 6918 case OMPD_threadprivate: 6919 case OMPD_allocate: 6920 case OMPD_task: 6921 case OMPD_simd: 6922 case OMPD_sections: 6923 case OMPD_section: 6924 case OMPD_single: 6925 case OMPD_master: 6926 case OMPD_critical: 6927 case OMPD_taskyield: 6928 case OMPD_barrier: 6929 case OMPD_taskwait: 6930 case OMPD_taskgroup: 6931 case OMPD_atomic: 6932 case OMPD_flush: 6933 case OMPD_teams: 6934 case OMPD_target_data: 6935 case OMPD_target_exit_data: 6936 case OMPD_target_enter_data: 6937 case OMPD_distribute: 6938 case OMPD_distribute_simd: 6939 case OMPD_distribute_parallel_for: 6940 case OMPD_distribute_parallel_for_simd: 6941 case OMPD_teams_distribute: 6942 case OMPD_teams_distribute_simd: 6943 case OMPD_teams_distribute_parallel_for: 6944 case OMPD_teams_distribute_parallel_for_simd: 6945 case OMPD_target_update: 6946 case OMPD_declare_simd: 6947 case OMPD_declare_target: 6948 case OMPD_end_declare_target: 6949 case OMPD_declare_reduction: 6950 case OMPD_declare_mapper: 6951 case OMPD_taskloop: 6952 case OMPD_taskloop_simd: 6953 case OMPD_requires: 6954 case OMPD_unknown: 6955 break; 6956 } 6957 llvm_unreachable("Unsupported directive kind."); 6958 } 6959 6960 namespace { 6961 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6962 6963 // Utility to handle information from clauses associated with a given 6964 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6965 // It provides a convenient interface to obtain the information and generate 6966 // code for that information. 6967 class MappableExprsHandler { 6968 public: 6969 /// Values for bit flags used to specify the mapping type for 6970 /// offloading. 6971 enum OpenMPOffloadMappingFlags : uint64_t { 6972 /// No flags 6973 OMP_MAP_NONE = 0x0, 6974 /// Allocate memory on the device and move data from host to device. 6975 OMP_MAP_TO = 0x01, 6976 /// Allocate memory on the device and move data from device to host. 6977 OMP_MAP_FROM = 0x02, 6978 /// Always perform the requested mapping action on the element, even 6979 /// if it was already mapped before. 6980 OMP_MAP_ALWAYS = 0x04, 6981 /// Delete the element from the device environment, ignoring the 6982 /// current reference count associated with the element. 6983 OMP_MAP_DELETE = 0x08, 6984 /// The element being mapped is a pointer-pointee pair; both the 6985 /// pointer and the pointee should be mapped. 6986 OMP_MAP_PTR_AND_OBJ = 0x10, 6987 /// This flags signals that the base address of an entry should be 6988 /// passed to the target kernel as an argument. 6989 OMP_MAP_TARGET_PARAM = 0x20, 6990 /// Signal that the runtime library has to return the device pointer 6991 /// in the current position for the data being mapped. Used when we have the 6992 /// use_device_ptr clause. 6993 OMP_MAP_RETURN_PARAM = 0x40, 6994 /// This flag signals that the reference being passed is a pointer to 6995 /// private data. 6996 OMP_MAP_PRIVATE = 0x80, 6997 /// Pass the element to the device by value. 6998 OMP_MAP_LITERAL = 0x100, 6999 /// Implicit map 7000 OMP_MAP_IMPLICIT = 0x200, 7001 /// The 16 MSBs of the flags indicate whether the entry is member of some 7002 /// struct/class. 7003 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7004 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7005 }; 7006 7007 /// Class that associates information with a base pointer to be passed to the 7008 /// runtime library. 7009 class BasePointerInfo { 7010 /// The base pointer. 7011 llvm::Value *Ptr = nullptr; 7012 /// The base declaration that refers to this device pointer, or null if 7013 /// there is none. 7014 const ValueDecl *DevPtrDecl = nullptr; 7015 7016 public: 7017 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7018 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7019 llvm::Value *operator*() const { return Ptr; } 7020 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7021 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7022 }; 7023 7024 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7025 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7026 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7027 7028 /// Map between a struct and the its lowest & highest elements which have been 7029 /// mapped. 7030 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7031 /// HE(FieldIndex, Pointer)} 7032 struct StructRangeInfoTy { 7033 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7034 0, Address::invalid()}; 7035 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7036 0, Address::invalid()}; 7037 Address Base = Address::invalid(); 7038 }; 7039 7040 private: 7041 /// Kind that defines how a device pointer has to be returned. 7042 struct MapInfo { 7043 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7044 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7045 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7046 bool ReturnDevicePointer = false; 7047 bool IsImplicit = false; 7048 7049 MapInfo() = default; 7050 MapInfo( 7051 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7052 OpenMPMapClauseKind MapType, 7053 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7054 bool ReturnDevicePointer, bool IsImplicit) 7055 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7056 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7057 }; 7058 7059 /// If use_device_ptr is used on a pointer which is a struct member and there 7060 /// is no map information about it, then emission of that entry is deferred 7061 /// until the whole struct has been processed. 7062 struct DeferredDevicePtrEntryTy { 7063 const Expr *IE = nullptr; 7064 const ValueDecl *VD = nullptr; 7065 7066 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7067 : IE(IE), VD(VD) {} 7068 }; 7069 7070 /// Directive from where the map clauses were extracted. 7071 const OMPExecutableDirective &CurDir; 7072 7073 /// Function the directive is being generated for. 7074 CodeGenFunction &CGF; 7075 7076 /// Set of all first private variables in the current directive. 7077 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 7078 7079 /// Map between device pointer declarations and their expression components. 7080 /// The key value for declarations in 'this' is null. 7081 llvm::DenseMap< 7082 const ValueDecl *, 7083 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7084 DevPointersMap; 7085 7086 llvm::Value *getExprTypeSize(const Expr *E) const { 7087 QualType ExprTy = E->getType().getCanonicalType(); 7088 7089 // Reference types are ignored for mapping purposes. 7090 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7091 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7092 7093 // Given that an array section is considered a built-in type, we need to 7094 // do the calculation based on the length of the section instead of relying 7095 // on CGF.getTypeSize(E->getType()). 7096 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7097 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7098 OAE->getBase()->IgnoreParenImpCasts()) 7099 .getCanonicalType(); 7100 7101 // If there is no length associated with the expression, that means we 7102 // are using the whole length of the base. 7103 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 7104 return CGF.getTypeSize(BaseTy); 7105 7106 llvm::Value *ElemSize; 7107 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7108 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7109 } else { 7110 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7111 assert(ATy && "Expecting array type if not a pointer type."); 7112 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7113 } 7114 7115 // If we don't have a length at this point, that is because we have an 7116 // array section with a single element. 7117 if (!OAE->getLength()) 7118 return ElemSize; 7119 7120 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 7121 LengthVal = 7122 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 7123 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7124 } 7125 return CGF.getTypeSize(ExprTy); 7126 } 7127 7128 /// Return the corresponding bits for a given map clause modifier. Add 7129 /// a flag marking the map as a pointer if requested. Add a flag marking the 7130 /// map as the first one of a series of maps that relate to the same map 7131 /// expression. 7132 OpenMPOffloadMappingFlags getMapTypeBits( 7133 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7134 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7135 OpenMPOffloadMappingFlags Bits = 7136 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7137 switch (MapType) { 7138 case OMPC_MAP_alloc: 7139 case OMPC_MAP_release: 7140 // alloc and release is the default behavior in the runtime library, i.e. 7141 // if we don't pass any bits alloc/release that is what the runtime is 7142 // going to do. Therefore, we don't need to signal anything for these two 7143 // type modifiers. 7144 break; 7145 case OMPC_MAP_to: 7146 Bits |= OMP_MAP_TO; 7147 break; 7148 case OMPC_MAP_from: 7149 Bits |= OMP_MAP_FROM; 7150 break; 7151 case OMPC_MAP_tofrom: 7152 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7153 break; 7154 case OMPC_MAP_delete: 7155 Bits |= OMP_MAP_DELETE; 7156 break; 7157 case OMPC_MAP_unknown: 7158 llvm_unreachable("Unexpected map type!"); 7159 } 7160 if (AddPtrFlag) 7161 Bits |= OMP_MAP_PTR_AND_OBJ; 7162 if (AddIsTargetParamFlag) 7163 Bits |= OMP_MAP_TARGET_PARAM; 7164 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7165 != MapModifiers.end()) 7166 Bits |= OMP_MAP_ALWAYS; 7167 return Bits; 7168 } 7169 7170 /// Return true if the provided expression is a final array section. A 7171 /// final array section, is one whose length can't be proved to be one. 7172 bool isFinalArraySectionExpression(const Expr *E) const { 7173 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7174 7175 // It is not an array section and therefore not a unity-size one. 7176 if (!OASE) 7177 return false; 7178 7179 // An array section with no colon always refer to a single element. 7180 if (OASE->getColonLoc().isInvalid()) 7181 return false; 7182 7183 const Expr *Length = OASE->getLength(); 7184 7185 // If we don't have a length we have to check if the array has size 1 7186 // for this dimension. Also, we should always expect a length if the 7187 // base type is pointer. 7188 if (!Length) { 7189 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7190 OASE->getBase()->IgnoreParenImpCasts()) 7191 .getCanonicalType(); 7192 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7193 return ATy->getSize().getSExtValue() != 1; 7194 // If we don't have a constant dimension length, we have to consider 7195 // the current section as having any size, so it is not necessarily 7196 // unitary. If it happen to be unity size, that's user fault. 7197 return true; 7198 } 7199 7200 // Check if the length evaluates to 1. 7201 Expr::EvalResult Result; 7202 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7203 return true; // Can have more that size 1. 7204 7205 llvm::APSInt ConstLength = Result.Val.getInt(); 7206 return ConstLength.getSExtValue() != 1; 7207 } 7208 7209 /// Generate the base pointers, section pointers, sizes and map type 7210 /// bits for the provided map type, map modifier, and expression components. 7211 /// \a IsFirstComponent should be set to true if the provided set of 7212 /// components is the first associated with a capture. 7213 void generateInfoForComponentList( 7214 OpenMPMapClauseKind MapType, 7215 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7216 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7217 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7218 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7219 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7220 bool IsImplicit, 7221 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7222 OverlappedElements = llvm::None) const { 7223 // The following summarizes what has to be generated for each map and the 7224 // types below. The generated information is expressed in this order: 7225 // base pointer, section pointer, size, flags 7226 // (to add to the ones that come from the map type and modifier). 7227 // 7228 // double d; 7229 // int i[100]; 7230 // float *p; 7231 // 7232 // struct S1 { 7233 // int i; 7234 // float f[50]; 7235 // } 7236 // struct S2 { 7237 // int i; 7238 // float f[50]; 7239 // S1 s; 7240 // double *p; 7241 // struct S2 *ps; 7242 // } 7243 // S2 s; 7244 // S2 *ps; 7245 // 7246 // map(d) 7247 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7248 // 7249 // map(i) 7250 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7251 // 7252 // map(i[1:23]) 7253 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7254 // 7255 // map(p) 7256 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7257 // 7258 // map(p[1:24]) 7259 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7260 // 7261 // map(s) 7262 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7263 // 7264 // map(s.i) 7265 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7266 // 7267 // map(s.s.f) 7268 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7269 // 7270 // map(s.p) 7271 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7272 // 7273 // map(to: s.p[:22]) 7274 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7275 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7276 // &(s.p), &(s.p[0]), 22*sizeof(double), 7277 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7278 // (*) alloc space for struct members, only this is a target parameter 7279 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7280 // optimizes this entry out, same in the examples below) 7281 // (***) map the pointee (map: to) 7282 // 7283 // map(s.ps) 7284 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7285 // 7286 // map(from: s.ps->s.i) 7287 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7288 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7289 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7290 // 7291 // map(to: s.ps->ps) 7292 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7293 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7294 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7295 // 7296 // map(s.ps->ps->ps) 7297 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7298 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7299 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7300 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7301 // 7302 // map(to: s.ps->ps->s.f[:22]) 7303 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7304 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7305 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7306 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7307 // 7308 // map(ps) 7309 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7310 // 7311 // map(ps->i) 7312 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7313 // 7314 // map(ps->s.f) 7315 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7316 // 7317 // map(from: ps->p) 7318 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7319 // 7320 // map(to: ps->p[:22]) 7321 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7322 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7323 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7324 // 7325 // map(ps->ps) 7326 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7327 // 7328 // map(from: ps->ps->s.i) 7329 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7330 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7331 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7332 // 7333 // map(from: ps->ps->ps) 7334 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7335 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7336 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7337 // 7338 // map(ps->ps->ps->ps) 7339 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7340 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7341 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7342 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7343 // 7344 // map(to: ps->ps->ps->s.f[:22]) 7345 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7346 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7347 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7348 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7349 // 7350 // map(to: s.f[:22]) map(from: s.p[:33]) 7351 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7352 // sizeof(double*) (**), TARGET_PARAM 7353 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7354 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7355 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7356 // (*) allocate contiguous space needed to fit all mapped members even if 7357 // we allocate space for members not mapped (in this example, 7358 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7359 // them as well because they fall between &s.f[0] and &s.p) 7360 // 7361 // map(from: s.f[:22]) map(to: ps->p[:33]) 7362 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7363 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7364 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7365 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7366 // (*) the struct this entry pertains to is the 2nd element in the list of 7367 // arguments, hence MEMBER_OF(2) 7368 // 7369 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7370 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7371 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7372 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7373 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7374 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7375 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7376 // (*) the struct this entry pertains to is the 4th element in the list 7377 // of arguments, hence MEMBER_OF(4) 7378 7379 // Track if the map information being generated is the first for a capture. 7380 bool IsCaptureFirstInfo = IsFirstComponentList; 7381 bool IsLink = false; // Is this variable a "declare target link"? 7382 7383 // Scan the components from the base to the complete expression. 7384 auto CI = Components.rbegin(); 7385 auto CE = Components.rend(); 7386 auto I = CI; 7387 7388 // Track if the map information being generated is the first for a list of 7389 // components. 7390 bool IsExpressionFirstInfo = true; 7391 Address BP = Address::invalid(); 7392 const Expr *AssocExpr = I->getAssociatedExpression(); 7393 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7394 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7395 7396 if (isa<MemberExpr>(AssocExpr)) { 7397 // The base is the 'this' pointer. The content of the pointer is going 7398 // to be the base of the field being mapped. 7399 BP = CGF.LoadCXXThisAddress(); 7400 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7401 (OASE && 7402 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7403 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7404 } else { 7405 // The base is the reference to the variable. 7406 // BP = &Var. 7407 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7408 if (const auto *VD = 7409 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7410 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7411 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) 7412 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { 7413 IsLink = true; 7414 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 7415 } 7416 } 7417 7418 // If the variable is a pointer and is being dereferenced (i.e. is not 7419 // the last component), the base has to be the pointer itself, not its 7420 // reference. References are ignored for mapping purposes. 7421 QualType Ty = 7422 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7423 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7424 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7425 7426 // We do not need to generate individual map information for the 7427 // pointer, it can be associated with the combined storage. 7428 ++I; 7429 } 7430 } 7431 7432 // Track whether a component of the list should be marked as MEMBER_OF some 7433 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7434 // in a component list should be marked as MEMBER_OF, all subsequent entries 7435 // do not belong to the base struct. E.g. 7436 // struct S2 s; 7437 // s.ps->ps->ps->f[:] 7438 // (1) (2) (3) (4) 7439 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7440 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7441 // is the pointee of ps(2) which is not member of struct s, so it should not 7442 // be marked as such (it is still PTR_AND_OBJ). 7443 // The variable is initialized to false so that PTR_AND_OBJ entries which 7444 // are not struct members are not considered (e.g. array of pointers to 7445 // data). 7446 bool ShouldBeMemberOf = false; 7447 7448 // Variable keeping track of whether or not we have encountered a component 7449 // in the component list which is a member expression. Useful when we have a 7450 // pointer or a final array section, in which case it is the previous 7451 // component in the list which tells us whether we have a member expression. 7452 // E.g. X.f[:] 7453 // While processing the final array section "[:]" it is "f" which tells us 7454 // whether we are dealing with a member of a declared struct. 7455 const MemberExpr *EncounteredME = nullptr; 7456 7457 for (; I != CE; ++I) { 7458 // If the current component is member of a struct (parent struct) mark it. 7459 if (!EncounteredME) { 7460 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7461 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7462 // as MEMBER_OF the parent struct. 7463 if (EncounteredME) 7464 ShouldBeMemberOf = true; 7465 } 7466 7467 auto Next = std::next(I); 7468 7469 // We need to generate the addresses and sizes if this is the last 7470 // component, if the component is a pointer or if it is an array section 7471 // whose length can't be proved to be one. If this is a pointer, it 7472 // becomes the base address for the following components. 7473 7474 // A final array section, is one whose length can't be proved to be one. 7475 bool IsFinalArraySection = 7476 isFinalArraySectionExpression(I->getAssociatedExpression()); 7477 7478 // Get information on whether the element is a pointer. Have to do a 7479 // special treatment for array sections given that they are built-in 7480 // types. 7481 const auto *OASE = 7482 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7483 bool IsPointer = 7484 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7485 .getCanonicalType() 7486 ->isAnyPointerType()) || 7487 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7488 7489 if (Next == CE || IsPointer || IsFinalArraySection) { 7490 // If this is not the last component, we expect the pointer to be 7491 // associated with an array expression or member expression. 7492 assert((Next == CE || 7493 isa<MemberExpr>(Next->getAssociatedExpression()) || 7494 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7495 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7496 "Unexpected expression"); 7497 7498 Address LB = 7499 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7500 7501 // If this component is a pointer inside the base struct then we don't 7502 // need to create any entry for it - it will be combined with the object 7503 // it is pointing to into a single PTR_AND_OBJ entry. 7504 bool IsMemberPointer = 7505 IsPointer && EncounteredME && 7506 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7507 EncounteredME); 7508 if (!OverlappedElements.empty()) { 7509 // Handle base element with the info for overlapped elements. 7510 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7511 assert(Next == CE && 7512 "Expected last element for the overlapped elements."); 7513 assert(!IsPointer && 7514 "Unexpected base element with the pointer type."); 7515 // Mark the whole struct as the struct that requires allocation on the 7516 // device. 7517 PartialStruct.LowestElem = {0, LB}; 7518 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7519 I->getAssociatedExpression()->getType()); 7520 Address HB = CGF.Builder.CreateConstGEP( 7521 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7522 CGF.VoidPtrTy), 7523 TypeSize.getQuantity() - 1); 7524 PartialStruct.HighestElem = { 7525 std::numeric_limits<decltype( 7526 PartialStruct.HighestElem.first)>::max(), 7527 HB}; 7528 PartialStruct.Base = BP; 7529 // Emit data for non-overlapped data. 7530 OpenMPOffloadMappingFlags Flags = 7531 OMP_MAP_MEMBER_OF | 7532 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7533 /*AddPtrFlag=*/false, 7534 /*AddIsTargetParamFlag=*/false); 7535 LB = BP; 7536 llvm::Value *Size = nullptr; 7537 // Do bitcopy of all non-overlapped structure elements. 7538 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7539 Component : OverlappedElements) { 7540 Address ComponentLB = Address::invalid(); 7541 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7542 Component) { 7543 if (MC.getAssociatedDeclaration()) { 7544 ComponentLB = 7545 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7546 .getAddress(); 7547 Size = CGF.Builder.CreatePtrDiff( 7548 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7549 CGF.EmitCastToVoidPtr(LB.getPointer())); 7550 break; 7551 } 7552 } 7553 BasePointers.push_back(BP.getPointer()); 7554 Pointers.push_back(LB.getPointer()); 7555 Sizes.push_back(Size); 7556 Types.push_back(Flags); 7557 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7558 } 7559 BasePointers.push_back(BP.getPointer()); 7560 Pointers.push_back(LB.getPointer()); 7561 Size = CGF.Builder.CreatePtrDiff( 7562 CGF.EmitCastToVoidPtr( 7563 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7564 CGF.EmitCastToVoidPtr(LB.getPointer())); 7565 Sizes.push_back(Size); 7566 Types.push_back(Flags); 7567 break; 7568 } 7569 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7570 if (!IsMemberPointer) { 7571 BasePointers.push_back(BP.getPointer()); 7572 Pointers.push_back(LB.getPointer()); 7573 Sizes.push_back(Size); 7574 7575 // We need to add a pointer flag for each map that comes from the 7576 // same expression except for the first one. We also need to signal 7577 // this map is the first one that relates with the current capture 7578 // (there is a set of entries for each capture). 7579 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7580 MapType, MapModifiers, IsImplicit, 7581 !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); 7582 7583 if (!IsExpressionFirstInfo) { 7584 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7585 // then we reset the TO/FROM/ALWAYS/DELETE flags. 7586 if (IsPointer) 7587 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7588 OMP_MAP_DELETE); 7589 7590 if (ShouldBeMemberOf) { 7591 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7592 // should be later updated with the correct value of MEMBER_OF. 7593 Flags |= OMP_MAP_MEMBER_OF; 7594 // From now on, all subsequent PTR_AND_OBJ entries should not be 7595 // marked as MEMBER_OF. 7596 ShouldBeMemberOf = false; 7597 } 7598 } 7599 7600 Types.push_back(Flags); 7601 } 7602 7603 // If we have encountered a member expression so far, keep track of the 7604 // mapped member. If the parent is "*this", then the value declaration 7605 // is nullptr. 7606 if (EncounteredME) { 7607 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7608 unsigned FieldIndex = FD->getFieldIndex(); 7609 7610 // Update info about the lowest and highest elements for this struct 7611 if (!PartialStruct.Base.isValid()) { 7612 PartialStruct.LowestElem = {FieldIndex, LB}; 7613 PartialStruct.HighestElem = {FieldIndex, LB}; 7614 PartialStruct.Base = BP; 7615 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7616 PartialStruct.LowestElem = {FieldIndex, LB}; 7617 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7618 PartialStruct.HighestElem = {FieldIndex, LB}; 7619 } 7620 } 7621 7622 // If we have a final array section, we are done with this expression. 7623 if (IsFinalArraySection) 7624 break; 7625 7626 // The pointer becomes the base for the next element. 7627 if (Next != CE) 7628 BP = LB; 7629 7630 IsExpressionFirstInfo = false; 7631 IsCaptureFirstInfo = false; 7632 } 7633 } 7634 } 7635 7636 /// Return the adjusted map modifiers if the declaration a capture refers to 7637 /// appears in a first-private clause. This is expected to be used only with 7638 /// directives that start with 'target'. 7639 MappableExprsHandler::OpenMPOffloadMappingFlags 7640 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7641 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7642 7643 // A first private variable captured by reference will use only the 7644 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7645 // declaration is known as first-private in this handler. 7646 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7647 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7648 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7649 return MappableExprsHandler::OMP_MAP_ALWAYS | 7650 MappableExprsHandler::OMP_MAP_TO; 7651 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7652 return MappableExprsHandler::OMP_MAP_TO | 7653 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7654 return MappableExprsHandler::OMP_MAP_PRIVATE | 7655 MappableExprsHandler::OMP_MAP_TO; 7656 } 7657 return MappableExprsHandler::OMP_MAP_TO | 7658 MappableExprsHandler::OMP_MAP_FROM; 7659 } 7660 7661 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7662 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. 7663 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7664 << 48); 7665 } 7666 7667 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7668 OpenMPOffloadMappingFlags MemberOfFlag) { 7669 // If the entry is PTR_AND_OBJ but has not been marked with the special 7670 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7671 // marked as MEMBER_OF. 7672 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7673 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7674 return; 7675 7676 // Reset the placeholder value to prepare the flag for the assignment of the 7677 // proper MEMBER_OF value. 7678 Flags &= ~OMP_MAP_MEMBER_OF; 7679 Flags |= MemberOfFlag; 7680 } 7681 7682 void getPlainLayout(const CXXRecordDecl *RD, 7683 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7684 bool AsBase) const { 7685 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7686 7687 llvm::StructType *St = 7688 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7689 7690 unsigned NumElements = St->getNumElements(); 7691 llvm::SmallVector< 7692 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7693 RecordLayout(NumElements); 7694 7695 // Fill bases. 7696 for (const auto &I : RD->bases()) { 7697 if (I.isVirtual()) 7698 continue; 7699 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7700 // Ignore empty bases. 7701 if (Base->isEmpty() || CGF.getContext() 7702 .getASTRecordLayout(Base) 7703 .getNonVirtualSize() 7704 .isZero()) 7705 continue; 7706 7707 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7708 RecordLayout[FieldIndex] = Base; 7709 } 7710 // Fill in virtual bases. 7711 for (const auto &I : RD->vbases()) { 7712 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7713 // Ignore empty bases. 7714 if (Base->isEmpty()) 7715 continue; 7716 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7717 if (RecordLayout[FieldIndex]) 7718 continue; 7719 RecordLayout[FieldIndex] = Base; 7720 } 7721 // Fill in all the fields. 7722 assert(!RD->isUnion() && "Unexpected union."); 7723 for (const auto *Field : RD->fields()) { 7724 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7725 // will fill in later.) 7726 if (!Field->isBitField()) { 7727 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7728 RecordLayout[FieldIndex] = Field; 7729 } 7730 } 7731 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7732 &Data : RecordLayout) { 7733 if (Data.isNull()) 7734 continue; 7735 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7736 getPlainLayout(Base, Layout, /*AsBase=*/true); 7737 else 7738 Layout.push_back(Data.get<const FieldDecl *>()); 7739 } 7740 } 7741 7742 public: 7743 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7744 : CurDir(Dir), CGF(CGF) { 7745 // Extract firstprivate clause information. 7746 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7747 for (const auto *D : C->varlists()) 7748 FirstPrivateDecls.insert( 7749 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 7750 // Extract device pointer clause information. 7751 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7752 for (auto L : C->component_lists()) 7753 DevPointersMap[L.first].push_back(L.second); 7754 } 7755 7756 /// Generate code for the combined entry if we have a partially mapped struct 7757 /// and take care of the mapping flags of the arguments corresponding to 7758 /// individual struct members. 7759 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7760 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7761 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7762 const StructRangeInfoTy &PartialStruct) const { 7763 // Base is the base of the struct 7764 BasePointers.push_back(PartialStruct.Base.getPointer()); 7765 // Pointer is the address of the lowest element 7766 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7767 Pointers.push_back(LB); 7768 // Size is (addr of {highest+1} element) - (addr of lowest element) 7769 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7770 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7771 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7772 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7773 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7774 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy, 7775 /*isSinged=*/false); 7776 Sizes.push_back(Size); 7777 // Map type is always TARGET_PARAM 7778 Types.push_back(OMP_MAP_TARGET_PARAM); 7779 // Remove TARGET_PARAM flag from the first element 7780 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7781 7782 // All other current entries will be MEMBER_OF the combined entry 7783 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7784 // 0xFFFF in the MEMBER_OF field). 7785 OpenMPOffloadMappingFlags MemberOfFlag = 7786 getMemberOfFlag(BasePointers.size() - 1); 7787 for (auto &M : CurTypes) 7788 setCorrectMemberOfFlag(M, MemberOfFlag); 7789 } 7790 7791 /// Generate all the base pointers, section pointers, sizes and map 7792 /// types for the extracted mappable expressions. Also, for each item that 7793 /// relates with a device pointer, a pair of the relevant declaration and 7794 /// index where it occurs is appended to the device pointers info array. 7795 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7796 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7797 MapFlagsArrayTy &Types) const { 7798 // We have to process the component lists that relate with the same 7799 // declaration in a single chunk so that we can generate the map flags 7800 // correctly. Therefore, we organize all lists in a map. 7801 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7802 7803 // Helper function to fill the information map for the different supported 7804 // clauses. 7805 auto &&InfoGen = [&Info]( 7806 const ValueDecl *D, 7807 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7808 OpenMPMapClauseKind MapType, 7809 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7810 bool ReturnDevicePointer, bool IsImplicit) { 7811 const ValueDecl *VD = 7812 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7813 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7814 IsImplicit); 7815 }; 7816 7817 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7818 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7819 for (const auto &L : C->component_lists()) { 7820 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7821 /*ReturnDevicePointer=*/false, C->isImplicit()); 7822 } 7823 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 7824 for (const auto &L : C->component_lists()) { 7825 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7826 /*ReturnDevicePointer=*/false, C->isImplicit()); 7827 } 7828 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 7829 for (const auto &L : C->component_lists()) { 7830 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7831 /*ReturnDevicePointer=*/false, C->isImplicit()); 7832 } 7833 7834 // Look at the use_device_ptr clause information and mark the existing map 7835 // entries as such. If there is no map information for an entry in the 7836 // use_device_ptr list, we create one with map type 'alloc' and zero size 7837 // section. It is the user fault if that was not mapped before. If there is 7838 // no map information and the pointer is a struct member, then we defer the 7839 // emission of that entry until the whole struct has been processed. 7840 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7841 DeferredInfo; 7842 7843 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7844 for (const auto *C : 7845 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { 7846 for (const auto &L : C->component_lists()) { 7847 assert(!L.second.empty() && "Not expecting empty list of components!"); 7848 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7849 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7850 const Expr *IE = L.second.back().getAssociatedExpression(); 7851 // If the first component is a member expression, we have to look into 7852 // 'this', which maps to null in the map of map information. Otherwise 7853 // look directly for the information. 7854 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7855 7856 // We potentially have map information for this declaration already. 7857 // Look for the first set of components that refer to it. 7858 if (It != Info.end()) { 7859 auto CI = std::find_if( 7860 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7861 return MI.Components.back().getAssociatedDeclaration() == VD; 7862 }); 7863 // If we found a map entry, signal that the pointer has to be returned 7864 // and move on to the next declaration. 7865 if (CI != It->second.end()) { 7866 CI->ReturnDevicePointer = true; 7867 continue; 7868 } 7869 } 7870 7871 // We didn't find any match in our map information - generate a zero 7872 // size array section - if the pointer is a struct member we defer this 7873 // action until the whole struct has been processed. 7874 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 7875 if (isa<MemberExpr>(IE)) { 7876 // Insert the pointer into Info to be processed by 7877 // generateInfoForComponentList. Because it is a member pointer 7878 // without a pointee, no entry will be generated for it, therefore 7879 // we need to generate one after the whole struct has been processed. 7880 // Nonetheless, generateInfoForComponentList must be called to take 7881 // the pointer into account for the calculation of the range of the 7882 // partial struct. 7883 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 7884 /*ReturnDevicePointer=*/false, C->isImplicit()); 7885 DeferredInfo[nullptr].emplace_back(IE, VD); 7886 } else { 7887 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7888 this->CGF.EmitLValue(IE), IE->getExprLoc()); 7889 BasePointers.emplace_back(Ptr, VD); 7890 Pointers.push_back(Ptr); 7891 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7892 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 7893 } 7894 } 7895 } 7896 7897 for (const auto &M : Info) { 7898 // We need to know when we generate information for the first component 7899 // associated with a capture, because the mapping flags depend on it. 7900 bool IsFirstComponentList = true; 7901 7902 // Temporary versions of arrays 7903 MapBaseValuesArrayTy CurBasePointers; 7904 MapValuesArrayTy CurPointers; 7905 MapValuesArrayTy CurSizes; 7906 MapFlagsArrayTy CurTypes; 7907 StructRangeInfoTy PartialStruct; 7908 7909 for (const MapInfo &L : M.second) { 7910 assert(!L.Components.empty() && 7911 "Not expecting declaration with no component lists."); 7912 7913 // Remember the current base pointer index. 7914 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 7915 // FIXME: MSVC 2013 seems to require this-> to find the member method. 7916 this->generateInfoForComponentList( 7917 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 7918 CurPointers, CurSizes, CurTypes, PartialStruct, 7919 IsFirstComponentList, L.IsImplicit); 7920 7921 // If this entry relates with a device pointer, set the relevant 7922 // declaration and add the 'return pointer' flag. 7923 if (L.ReturnDevicePointer) { 7924 assert(CurBasePointers.size() > CurrentBasePointersIdx && 7925 "Unexpected number of mapped base pointers."); 7926 7927 const ValueDecl *RelevantVD = 7928 L.Components.back().getAssociatedDeclaration(); 7929 assert(RelevantVD && 7930 "No relevant declaration related with device pointer??"); 7931 7932 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 7933 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 7934 } 7935 IsFirstComponentList = false; 7936 } 7937 7938 // Append any pending zero-length pointers which are struct members and 7939 // used with use_device_ptr. 7940 auto CI = DeferredInfo.find(M.first); 7941 if (CI != DeferredInfo.end()) { 7942 for (const DeferredDevicePtrEntryTy &L : CI->second) { 7943 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 7944 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7945 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 7946 CurBasePointers.emplace_back(BasePtr, L.VD); 7947 CurPointers.push_back(Ptr); 7948 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7949 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 7950 // value MEMBER_OF=FFFF so that the entry is later updated with the 7951 // correct value of MEMBER_OF. 7952 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 7953 OMP_MAP_MEMBER_OF); 7954 } 7955 } 7956 7957 // If there is an entry in PartialStruct it means we have a struct with 7958 // individual members mapped. Emit an extra combined entry. 7959 if (PartialStruct.Base.isValid()) 7960 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 7961 PartialStruct); 7962 7963 // We need to append the results of this capture to what we already have. 7964 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7965 Pointers.append(CurPointers.begin(), CurPointers.end()); 7966 Sizes.append(CurSizes.begin(), CurSizes.end()); 7967 Types.append(CurTypes.begin(), CurTypes.end()); 7968 } 7969 } 7970 7971 /// Emit capture info for lambdas for variables captured by reference. 7972 void generateInfoForLambdaCaptures( 7973 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 7974 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7975 MapFlagsArrayTy &Types, 7976 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 7977 const auto *RD = VD->getType() 7978 .getCanonicalType() 7979 .getNonReferenceType() 7980 ->getAsCXXRecordDecl(); 7981 if (!RD || !RD->isLambda()) 7982 return; 7983 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 7984 LValue VDLVal = CGF.MakeAddrLValue( 7985 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 7986 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 7987 FieldDecl *ThisCapture = nullptr; 7988 RD->getCaptureFields(Captures, ThisCapture); 7989 if (ThisCapture) { 7990 LValue ThisLVal = 7991 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 7992 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 7993 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 7994 BasePointers.push_back(ThisLVal.getPointer()); 7995 Pointers.push_back(ThisLValVal.getPointer()); 7996 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 7997 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 7998 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 7999 } 8000 for (const LambdaCapture &LC : RD->captures()) { 8001 if (LC.getCaptureKind() != LCK_ByRef) 8002 continue; 8003 const VarDecl *VD = LC.getCapturedVar(); 8004 auto It = Captures.find(VD); 8005 assert(It != Captures.end() && "Found lambda capture without field."); 8006 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8007 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8008 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8009 BasePointers.push_back(VarLVal.getPointer()); 8010 Pointers.push_back(VarLValVal.getPointer()); 8011 Sizes.push_back(CGF.getTypeSize( 8012 VD->getType().getCanonicalType().getNonReferenceType())); 8013 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8014 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8015 } 8016 } 8017 8018 /// Set correct indices for lambdas captures. 8019 void adjustMemberOfForLambdaCaptures( 8020 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8021 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8022 MapFlagsArrayTy &Types) const { 8023 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8024 // Set correct member_of idx for all implicit lambda captures. 8025 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8026 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8027 continue; 8028 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8029 assert(BasePtr && "Unable to find base lambda address."); 8030 int TgtIdx = -1; 8031 for (unsigned J = I; J > 0; --J) { 8032 unsigned Idx = J - 1; 8033 if (Pointers[Idx] != BasePtr) 8034 continue; 8035 TgtIdx = Idx; 8036 break; 8037 } 8038 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8039 // All other current entries will be MEMBER_OF the combined entry 8040 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8041 // 0xFFFF in the MEMBER_OF field). 8042 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8043 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8044 } 8045 } 8046 8047 /// Generate the base pointers, section pointers, sizes and map types 8048 /// associated to a given capture. 8049 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8050 llvm::Value *Arg, 8051 MapBaseValuesArrayTy &BasePointers, 8052 MapValuesArrayTy &Pointers, 8053 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8054 StructRangeInfoTy &PartialStruct) const { 8055 assert(!Cap->capturesVariableArrayType() && 8056 "Not expecting to generate map info for a variable array type!"); 8057 8058 // We need to know when we generating information for the first component 8059 const ValueDecl *VD = Cap->capturesThis() 8060 ? nullptr 8061 : Cap->getCapturedVar()->getCanonicalDecl(); 8062 8063 // If this declaration appears in a is_device_ptr clause we just have to 8064 // pass the pointer by value. If it is a reference to a declaration, we just 8065 // pass its value. 8066 if (DevPointersMap.count(VD)) { 8067 BasePointers.emplace_back(Arg, VD); 8068 Pointers.push_back(Arg); 8069 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 8070 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8071 return; 8072 } 8073 8074 using MapData = 8075 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8076 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8077 SmallVector<MapData, 4> DeclComponentLists; 8078 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 8079 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8080 for (const auto &L : C->decl_component_lists(VD)) { 8081 assert(L.first == VD && 8082 "We got information for the wrong declaration??"); 8083 assert(!L.second.empty() && 8084 "Not expecting declaration with no component lists."); 8085 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8086 C->getMapTypeModifiers(), 8087 C->isImplicit()); 8088 } 8089 } 8090 8091 // Find overlapping elements (including the offset from the base element). 8092 llvm::SmallDenseMap< 8093 const MapData *, 8094 llvm::SmallVector< 8095 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8096 4> 8097 OverlappedData; 8098 size_t Count = 0; 8099 for (const MapData &L : DeclComponentLists) { 8100 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8101 OpenMPMapClauseKind MapType; 8102 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8103 bool IsImplicit; 8104 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8105 ++Count; 8106 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8107 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8108 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8109 auto CI = Components.rbegin(); 8110 auto CE = Components.rend(); 8111 auto SI = Components1.rbegin(); 8112 auto SE = Components1.rend(); 8113 for (; CI != CE && SI != SE; ++CI, ++SI) { 8114 if (CI->getAssociatedExpression()->getStmtClass() != 8115 SI->getAssociatedExpression()->getStmtClass()) 8116 break; 8117 // Are we dealing with different variables/fields? 8118 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8119 break; 8120 } 8121 // Found overlapping if, at least for one component, reached the head of 8122 // the components list. 8123 if (CI == CE || SI == SE) { 8124 assert((CI != CE || SI != SE) && 8125 "Unexpected full match of the mapping components."); 8126 const MapData &BaseData = CI == CE ? L : L1; 8127 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8128 SI == SE ? Components : Components1; 8129 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8130 OverlappedElements.getSecond().push_back(SubData); 8131 } 8132 } 8133 } 8134 // Sort the overlapped elements for each item. 8135 llvm::SmallVector<const FieldDecl *, 4> Layout; 8136 if (!OverlappedData.empty()) { 8137 if (const auto *CRD = 8138 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8139 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8140 else { 8141 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8142 Layout.append(RD->field_begin(), RD->field_end()); 8143 } 8144 } 8145 for (auto &Pair : OverlappedData) { 8146 llvm::sort( 8147 Pair.getSecond(), 8148 [&Layout]( 8149 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8150 OMPClauseMappableExprCommon::MappableExprComponentListRef 8151 Second) { 8152 auto CI = First.rbegin(); 8153 auto CE = First.rend(); 8154 auto SI = Second.rbegin(); 8155 auto SE = Second.rend(); 8156 for (; CI != CE && SI != SE; ++CI, ++SI) { 8157 if (CI->getAssociatedExpression()->getStmtClass() != 8158 SI->getAssociatedExpression()->getStmtClass()) 8159 break; 8160 // Are we dealing with different variables/fields? 8161 if (CI->getAssociatedDeclaration() != 8162 SI->getAssociatedDeclaration()) 8163 break; 8164 } 8165 8166 // Lists contain the same elements. 8167 if (CI == CE && SI == SE) 8168 return false; 8169 8170 // List with less elements is less than list with more elements. 8171 if (CI == CE || SI == SE) 8172 return CI == CE; 8173 8174 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8175 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8176 if (FD1->getParent() == FD2->getParent()) 8177 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8178 const auto It = 8179 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8180 return FD == FD1 || FD == FD2; 8181 }); 8182 return *It == FD1; 8183 }); 8184 } 8185 8186 // Associated with a capture, because the mapping flags depend on it. 8187 // Go through all of the elements with the overlapped elements. 8188 for (const auto &Pair : OverlappedData) { 8189 const MapData &L = *Pair.getFirst(); 8190 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8191 OpenMPMapClauseKind MapType; 8192 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8193 bool IsImplicit; 8194 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8195 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8196 OverlappedComponents = Pair.getSecond(); 8197 bool IsFirstComponentList = true; 8198 generateInfoForComponentList(MapType, MapModifiers, Components, 8199 BasePointers, Pointers, Sizes, Types, 8200 PartialStruct, IsFirstComponentList, 8201 IsImplicit, OverlappedComponents); 8202 } 8203 // Go through other elements without overlapped elements. 8204 bool IsFirstComponentList = OverlappedData.empty(); 8205 for (const MapData &L : DeclComponentLists) { 8206 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8207 OpenMPMapClauseKind MapType; 8208 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8209 bool IsImplicit; 8210 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8211 auto It = OverlappedData.find(&L); 8212 if (It == OverlappedData.end()) 8213 generateInfoForComponentList(MapType, MapModifiers, Components, 8214 BasePointers, Pointers, Sizes, Types, 8215 PartialStruct, IsFirstComponentList, 8216 IsImplicit); 8217 IsFirstComponentList = false; 8218 } 8219 } 8220 8221 /// Generate the base pointers, section pointers, sizes and map types 8222 /// associated with the declare target link variables. 8223 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8224 MapValuesArrayTy &Pointers, 8225 MapValuesArrayTy &Sizes, 8226 MapFlagsArrayTy &Types) const { 8227 // Map other list items in the map clause which are not captured variables 8228 // but "declare target link" global variables., 8229 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8230 for (const auto &L : C->component_lists()) { 8231 if (!L.first) 8232 continue; 8233 const auto *VD = dyn_cast<VarDecl>(L.first); 8234 if (!VD) 8235 continue; 8236 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8237 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8238 if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8239 continue; 8240 StructRangeInfoTy PartialStruct; 8241 generateInfoForComponentList( 8242 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8243 Pointers, Sizes, Types, PartialStruct, 8244 /*IsFirstComponentList=*/true, C->isImplicit()); 8245 assert(!PartialStruct.Base.isValid() && 8246 "No partial structs for declare target link expected."); 8247 } 8248 } 8249 } 8250 8251 /// Generate the default map information for a given capture \a CI, 8252 /// record field declaration \a RI and captured value \a CV. 8253 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8254 const FieldDecl &RI, llvm::Value *CV, 8255 MapBaseValuesArrayTy &CurBasePointers, 8256 MapValuesArrayTy &CurPointers, 8257 MapValuesArrayTy &CurSizes, 8258 MapFlagsArrayTy &CurMapTypes) const { 8259 // Do the default mapping. 8260 if (CI.capturesThis()) { 8261 CurBasePointers.push_back(CV); 8262 CurPointers.push_back(CV); 8263 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8264 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 8265 // Default map type. 8266 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8267 } else if (CI.capturesVariableByCopy()) { 8268 CurBasePointers.push_back(CV); 8269 CurPointers.push_back(CV); 8270 if (!RI.getType()->isAnyPointerType()) { 8271 // We have to signal to the runtime captures passed by value that are 8272 // not pointers. 8273 CurMapTypes.push_back(OMP_MAP_LITERAL); 8274 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 8275 } else { 8276 // Pointers are implicitly mapped with a zero size and no flags 8277 // (other than first map that is added for all implicit maps). 8278 CurMapTypes.push_back(OMP_MAP_NONE); 8279 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 8280 } 8281 } else { 8282 assert(CI.capturesVariable() && "Expected captured reference."); 8283 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8284 QualType ElementType = PtrTy->getPointeeType(); 8285 CurSizes.push_back(CGF.getTypeSize(ElementType)); 8286 // The default map type for a scalar/complex type is 'to' because by 8287 // default the value doesn't have to be retrieved. For an aggregate 8288 // type, the default is 'tofrom'. 8289 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8290 const VarDecl *VD = CI.getCapturedVar(); 8291 if (FirstPrivateDecls.count(VD) && 8292 VD->getType().isConstant(CGF.getContext())) { 8293 llvm::Constant *Addr = 8294 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8295 // Copy the value of the original variable to the new global copy. 8296 CGF.Builder.CreateMemCpy( 8297 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8298 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8299 CurSizes.back(), /*isVolatile=*/false); 8300 // Use new global variable as the base pointers. 8301 CurBasePointers.push_back(Addr); 8302 CurPointers.push_back(Addr); 8303 } else { 8304 CurBasePointers.push_back(CV); 8305 if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) { 8306 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8307 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8308 AlignmentSource::Decl)); 8309 CurPointers.push_back(PtrAddr.getPointer()); 8310 } else { 8311 CurPointers.push_back(CV); 8312 } 8313 } 8314 } 8315 // Every default map produces a single argument which is a target parameter. 8316 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8317 8318 // Add flag stating this is an implicit map. 8319 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8320 } 8321 }; 8322 8323 enum OpenMPOffloadingReservedDeviceIDs { 8324 /// Device ID if the device was not defined, runtime should get it 8325 /// from environment variables in the spec. 8326 OMP_DEVICEID_UNDEF = -1, 8327 }; 8328 } // anonymous namespace 8329 8330 /// Emit the arrays used to pass the captures and map information to the 8331 /// offloading runtime library. If there is no map or capture information, 8332 /// return nullptr by reference. 8333 static void 8334 emitOffloadingArrays(CodeGenFunction &CGF, 8335 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8336 MappableExprsHandler::MapValuesArrayTy &Pointers, 8337 MappableExprsHandler::MapValuesArrayTy &Sizes, 8338 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8339 CGOpenMPRuntime::TargetDataInfo &Info) { 8340 CodeGenModule &CGM = CGF.CGM; 8341 ASTContext &Ctx = CGF.getContext(); 8342 8343 // Reset the array information. 8344 Info.clearArrayInfo(); 8345 Info.NumberOfPtrs = BasePointers.size(); 8346 8347 if (Info.NumberOfPtrs) { 8348 // Detect if we have any capture size requiring runtime evaluation of the 8349 // size so that a constant array could be eventually used. 8350 bool hasRuntimeEvaluationCaptureSize = false; 8351 for (llvm::Value *S : Sizes) 8352 if (!isa<llvm::Constant>(S)) { 8353 hasRuntimeEvaluationCaptureSize = true; 8354 break; 8355 } 8356 8357 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8358 QualType PointerArrayType = 8359 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 8360 /*IndexTypeQuals=*/0); 8361 8362 Info.BasePointersArray = 8363 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8364 Info.PointersArray = 8365 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8366 8367 // If we don't have any VLA types or other types that require runtime 8368 // evaluation, we can use a constant array for the map sizes, otherwise we 8369 // need to fill up the arrays as we do for the pointers. 8370 if (hasRuntimeEvaluationCaptureSize) { 8371 QualType SizeArrayType = Ctx.getConstantArrayType( 8372 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 8373 /*IndexTypeQuals=*/0); 8374 Info.SizesArray = 8375 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8376 } else { 8377 // We expect all the sizes to be constant, so we collect them to create 8378 // a constant array. 8379 SmallVector<llvm::Constant *, 16> ConstSizes; 8380 for (llvm::Value *S : Sizes) 8381 ConstSizes.push_back(cast<llvm::Constant>(S)); 8382 8383 auto *SizesArrayInit = llvm::ConstantArray::get( 8384 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 8385 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8386 auto *SizesArrayGbl = new llvm::GlobalVariable( 8387 CGM.getModule(), SizesArrayInit->getType(), 8388 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8389 SizesArrayInit, Name); 8390 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8391 Info.SizesArray = SizesArrayGbl; 8392 } 8393 8394 // The map types are always constant so we don't need to generate code to 8395 // fill arrays. Instead, we create an array constant. 8396 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8397 llvm::copy(MapTypes, Mapping.begin()); 8398 llvm::Constant *MapTypesArrayInit = 8399 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8400 std::string MaptypesName = 8401 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8402 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8403 CGM.getModule(), MapTypesArrayInit->getType(), 8404 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8405 MapTypesArrayInit, MaptypesName); 8406 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8407 Info.MapTypesArray = MapTypesArrayGbl; 8408 8409 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8410 llvm::Value *BPVal = *BasePointers[I]; 8411 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8412 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8413 Info.BasePointersArray, 0, I); 8414 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8415 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8416 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8417 CGF.Builder.CreateStore(BPVal, BPAddr); 8418 8419 if (Info.requiresDevicePointerInfo()) 8420 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8421 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8422 8423 llvm::Value *PVal = Pointers[I]; 8424 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8425 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8426 Info.PointersArray, 0, I); 8427 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8428 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8429 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8430 CGF.Builder.CreateStore(PVal, PAddr); 8431 8432 if (hasRuntimeEvaluationCaptureSize) { 8433 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8434 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 8435 Info.SizesArray, 8436 /*Idx0=*/0, 8437 /*Idx1=*/I); 8438 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 8439 CGF.Builder.CreateStore( 8440 CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), 8441 SAddr); 8442 } 8443 } 8444 } 8445 } 8446 /// Emit the arguments to be passed to the runtime library based on the 8447 /// arrays of pointers, sizes and map types. 8448 static void emitOffloadingArraysArgument( 8449 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8450 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8451 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8452 CodeGenModule &CGM = CGF.CGM; 8453 if (Info.NumberOfPtrs) { 8454 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8455 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8456 Info.BasePointersArray, 8457 /*Idx0=*/0, /*Idx1=*/0); 8458 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8459 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8460 Info.PointersArray, 8461 /*Idx0=*/0, 8462 /*Idx1=*/0); 8463 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8464 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 8465 /*Idx0=*/0, /*Idx1=*/0); 8466 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8467 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8468 Info.MapTypesArray, 8469 /*Idx0=*/0, 8470 /*Idx1=*/0); 8471 } else { 8472 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8473 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8474 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 8475 MapTypesArrayArg = 8476 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8477 } 8478 } 8479 8480 /// Check for inner distribute directive. 8481 static const OMPExecutableDirective * 8482 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8483 const auto *CS = D.getInnermostCapturedStmt(); 8484 const auto *Body = 8485 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8486 const Stmt *ChildStmt = 8487 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8488 8489 if (const auto *NestedDir = 8490 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8491 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8492 switch (D.getDirectiveKind()) { 8493 case OMPD_target: 8494 if (isOpenMPDistributeDirective(DKind)) 8495 return NestedDir; 8496 if (DKind == OMPD_teams) { 8497 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8498 /*IgnoreCaptured=*/true); 8499 if (!Body) 8500 return nullptr; 8501 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8502 if (const auto *NND = 8503 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8504 DKind = NND->getDirectiveKind(); 8505 if (isOpenMPDistributeDirective(DKind)) 8506 return NND; 8507 } 8508 } 8509 return nullptr; 8510 case OMPD_target_teams: 8511 if (isOpenMPDistributeDirective(DKind)) 8512 return NestedDir; 8513 return nullptr; 8514 case OMPD_target_parallel: 8515 case OMPD_target_simd: 8516 case OMPD_target_parallel_for: 8517 case OMPD_target_parallel_for_simd: 8518 return nullptr; 8519 case OMPD_target_teams_distribute: 8520 case OMPD_target_teams_distribute_simd: 8521 case OMPD_target_teams_distribute_parallel_for: 8522 case OMPD_target_teams_distribute_parallel_for_simd: 8523 case OMPD_parallel: 8524 case OMPD_for: 8525 case OMPD_parallel_for: 8526 case OMPD_parallel_sections: 8527 case OMPD_for_simd: 8528 case OMPD_parallel_for_simd: 8529 case OMPD_cancel: 8530 case OMPD_cancellation_point: 8531 case OMPD_ordered: 8532 case OMPD_threadprivate: 8533 case OMPD_allocate: 8534 case OMPD_task: 8535 case OMPD_simd: 8536 case OMPD_sections: 8537 case OMPD_section: 8538 case OMPD_single: 8539 case OMPD_master: 8540 case OMPD_critical: 8541 case OMPD_taskyield: 8542 case OMPD_barrier: 8543 case OMPD_taskwait: 8544 case OMPD_taskgroup: 8545 case OMPD_atomic: 8546 case OMPD_flush: 8547 case OMPD_teams: 8548 case OMPD_target_data: 8549 case OMPD_target_exit_data: 8550 case OMPD_target_enter_data: 8551 case OMPD_distribute: 8552 case OMPD_distribute_simd: 8553 case OMPD_distribute_parallel_for: 8554 case OMPD_distribute_parallel_for_simd: 8555 case OMPD_teams_distribute: 8556 case OMPD_teams_distribute_simd: 8557 case OMPD_teams_distribute_parallel_for: 8558 case OMPD_teams_distribute_parallel_for_simd: 8559 case OMPD_target_update: 8560 case OMPD_declare_simd: 8561 case OMPD_declare_target: 8562 case OMPD_end_declare_target: 8563 case OMPD_declare_reduction: 8564 case OMPD_declare_mapper: 8565 case OMPD_taskloop: 8566 case OMPD_taskloop_simd: 8567 case OMPD_requires: 8568 case OMPD_unknown: 8569 llvm_unreachable("Unexpected directive."); 8570 } 8571 } 8572 8573 return nullptr; 8574 } 8575 8576 void CGOpenMPRuntime::emitTargetNumIterationsCall( 8577 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 8578 const llvm::function_ref<llvm::Value *( 8579 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 8580 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 8581 const OMPExecutableDirective *TD = &D; 8582 // Get nested teams distribute kind directive, if any. 8583 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 8584 TD = getNestedDistributeDirective(CGM.getContext(), D); 8585 if (!TD) 8586 return; 8587 const auto *LD = cast<OMPLoopDirective>(TD); 8588 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 8589 PrePostActionTy &) { 8590 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 8591 8592 // Emit device ID if any. 8593 llvm::Value *DeviceID; 8594 if (Device) 8595 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8596 CGF.Int64Ty, /*isSigned=*/true); 8597 else 8598 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8599 8600 llvm::Value *Args[] = {DeviceID, NumIterations}; 8601 CGF.EmitRuntimeCall( 8602 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 8603 }; 8604 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 8605 } 8606 8607 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 8608 const OMPExecutableDirective &D, 8609 llvm::Function *OutlinedFn, 8610 llvm::Value *OutlinedFnID, 8611 const Expr *IfCond, const Expr *Device) { 8612 if (!CGF.HaveInsertPoint()) 8613 return; 8614 8615 assert(OutlinedFn && "Invalid outlined function!"); 8616 8617 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 8618 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 8619 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 8620 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 8621 PrePostActionTy &) { 8622 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8623 }; 8624 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 8625 8626 CodeGenFunction::OMPTargetDataInfo InputInfo; 8627 llvm::Value *MapTypesArray = nullptr; 8628 // Fill up the pointer arrays and transfer execution to the device. 8629 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 8630 &MapTypesArray, &CS, RequiresOuterTask, 8631 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 8632 // On top of the arrays that were filled up, the target offloading call 8633 // takes as arguments the device id as well as the host pointer. The host 8634 // pointer is used by the runtime library to identify the current target 8635 // region, so it only has to be unique and not necessarily point to 8636 // anything. It could be the pointer to the outlined function that 8637 // implements the target region, but we aren't using that so that the 8638 // compiler doesn't need to keep that, and could therefore inline the host 8639 // function if proven worthwhile during optimization. 8640 8641 // From this point on, we need to have an ID of the target region defined. 8642 assert(OutlinedFnID && "Invalid outlined function ID!"); 8643 8644 // Emit device ID if any. 8645 llvm::Value *DeviceID; 8646 if (Device) { 8647 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8648 CGF.Int64Ty, /*isSigned=*/true); 8649 } else { 8650 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8651 } 8652 8653 // Emit the number of elements in the offloading arrays. 8654 llvm::Value *PointerNum = 8655 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8656 8657 // Return value of the runtime offloading call. 8658 llvm::Value *Return; 8659 8660 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 8661 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 8662 8663 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8664 // The target region is an outlined function launched by the runtime 8665 // via calls __tgt_target() or __tgt_target_teams(). 8666 // 8667 // __tgt_target() launches a target region with one team and one thread, 8668 // executing a serial region. This master thread may in turn launch 8669 // more threads within its team upon encountering a parallel region, 8670 // however, no additional teams can be launched on the device. 8671 // 8672 // __tgt_target_teams() launches a target region with one or more teams, 8673 // each with one or more threads. This call is required for target 8674 // constructs such as: 8675 // 'target teams' 8676 // 'target' / 'teams' 8677 // 'target teams distribute parallel for' 8678 // 'target parallel' 8679 // and so on. 8680 // 8681 // Note that on the host and CPU targets, the runtime implementation of 8682 // these calls simply call the outlined function without forking threads. 8683 // The outlined functions themselves have runtime calls to 8684 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 8685 // the compiler in emitTeamsCall() and emitParallelCall(). 8686 // 8687 // In contrast, on the NVPTX target, the implementation of 8688 // __tgt_target_teams() launches a GPU kernel with the requested number 8689 // of teams and threads so no additional calls to the runtime are required. 8690 if (NumTeams) { 8691 // If we have NumTeams defined this means that we have an enclosed teams 8692 // region. Therefore we also expect to have NumThreads defined. These two 8693 // values should be defined in the presence of a teams directive, 8694 // regardless of having any clauses associated. If the user is using teams 8695 // but no clauses, these two values will be the default that should be 8696 // passed to the runtime library - a 32-bit integer with the value zero. 8697 assert(NumThreads && "Thread limit expression should be available along " 8698 "with number of teams."); 8699 llvm::Value *OffloadingArgs[] = {DeviceID, 8700 OutlinedFnID, 8701 PointerNum, 8702 InputInfo.BasePointersArray.getPointer(), 8703 InputInfo.PointersArray.getPointer(), 8704 InputInfo.SizesArray.getPointer(), 8705 MapTypesArray, 8706 NumTeams, 8707 NumThreads}; 8708 Return = CGF.EmitRuntimeCall( 8709 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 8710 : OMPRTL__tgt_target_teams), 8711 OffloadingArgs); 8712 } else { 8713 llvm::Value *OffloadingArgs[] = {DeviceID, 8714 OutlinedFnID, 8715 PointerNum, 8716 InputInfo.BasePointersArray.getPointer(), 8717 InputInfo.PointersArray.getPointer(), 8718 InputInfo.SizesArray.getPointer(), 8719 MapTypesArray}; 8720 Return = CGF.EmitRuntimeCall( 8721 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 8722 : OMPRTL__tgt_target), 8723 OffloadingArgs); 8724 } 8725 8726 // Check the error code and execute the host version if required. 8727 llvm::BasicBlock *OffloadFailedBlock = 8728 CGF.createBasicBlock("omp_offload.failed"); 8729 llvm::BasicBlock *OffloadContBlock = 8730 CGF.createBasicBlock("omp_offload.cont"); 8731 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 8732 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 8733 8734 CGF.EmitBlock(OffloadFailedBlock); 8735 if (RequiresOuterTask) { 8736 CapturedVars.clear(); 8737 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8738 } 8739 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8740 CGF.EmitBranch(OffloadContBlock); 8741 8742 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 8743 }; 8744 8745 // Notify that the host version must be executed. 8746 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 8747 RequiresOuterTask](CodeGenFunction &CGF, 8748 PrePostActionTy &) { 8749 if (RequiresOuterTask) { 8750 CapturedVars.clear(); 8751 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8752 } 8753 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8754 }; 8755 8756 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 8757 &CapturedVars, RequiresOuterTask, 8758 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 8759 // Fill up the arrays with all the captured variables. 8760 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8761 MappableExprsHandler::MapValuesArrayTy Pointers; 8762 MappableExprsHandler::MapValuesArrayTy Sizes; 8763 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8764 8765 // Get mappable expression information. 8766 MappableExprsHandler MEHandler(D, CGF); 8767 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 8768 8769 auto RI = CS.getCapturedRecordDecl()->field_begin(); 8770 auto CV = CapturedVars.begin(); 8771 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 8772 CE = CS.capture_end(); 8773 CI != CE; ++CI, ++RI, ++CV) { 8774 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 8775 MappableExprsHandler::MapValuesArrayTy CurPointers; 8776 MappableExprsHandler::MapValuesArrayTy CurSizes; 8777 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 8778 MappableExprsHandler::StructRangeInfoTy PartialStruct; 8779 8780 // VLA sizes are passed to the outlined region by copy and do not have map 8781 // information associated. 8782 if (CI->capturesVariableArrayType()) { 8783 CurBasePointers.push_back(*CV); 8784 CurPointers.push_back(*CV); 8785 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 8786 // Copy to the device as an argument. No need to retrieve it. 8787 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 8788 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 8789 } else { 8790 // If we have any information in the map clause, we use it, otherwise we 8791 // just do a default mapping. 8792 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 8793 CurSizes, CurMapTypes, PartialStruct); 8794 if (CurBasePointers.empty()) 8795 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 8796 CurPointers, CurSizes, CurMapTypes); 8797 // Generate correct mapping for variables captured by reference in 8798 // lambdas. 8799 if (CI->capturesVariable()) 8800 MEHandler.generateInfoForLambdaCaptures( 8801 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 8802 CurMapTypes, LambdaPointers); 8803 } 8804 // We expect to have at least an element of information for this capture. 8805 assert(!CurBasePointers.empty() && 8806 "Non-existing map pointer for capture!"); 8807 assert(CurBasePointers.size() == CurPointers.size() && 8808 CurBasePointers.size() == CurSizes.size() && 8809 CurBasePointers.size() == CurMapTypes.size() && 8810 "Inconsistent map information sizes!"); 8811 8812 // If there is an entry in PartialStruct it means we have a struct with 8813 // individual members mapped. Emit an extra combined entry. 8814 if (PartialStruct.Base.isValid()) 8815 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 8816 CurMapTypes, PartialStruct); 8817 8818 // We need to append the results of this capture to what we already have. 8819 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8820 Pointers.append(CurPointers.begin(), CurPointers.end()); 8821 Sizes.append(CurSizes.begin(), CurSizes.end()); 8822 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 8823 } 8824 // Adjust MEMBER_OF flags for the lambdas captures. 8825 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 8826 Pointers, MapTypes); 8827 // Map other list items in the map clause which are not captured variables 8828 // but "declare target link" global variables. 8829 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 8830 MapTypes); 8831 8832 TargetDataInfo Info; 8833 // Fill up the arrays and create the arguments. 8834 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8835 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8836 Info.PointersArray, Info.SizesArray, 8837 Info.MapTypesArray, Info); 8838 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8839 InputInfo.BasePointersArray = 8840 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8841 InputInfo.PointersArray = 8842 Address(Info.PointersArray, CGM.getPointerAlign()); 8843 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 8844 MapTypesArray = Info.MapTypesArray; 8845 if (RequiresOuterTask) 8846 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8847 else 8848 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8849 }; 8850 8851 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 8852 CodeGenFunction &CGF, PrePostActionTy &) { 8853 if (RequiresOuterTask) { 8854 CodeGenFunction::OMPTargetDataInfo InputInfo; 8855 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 8856 } else { 8857 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 8858 } 8859 }; 8860 8861 // If we have a target function ID it means that we need to support 8862 // offloading, otherwise, just execute on the host. We need to execute on host 8863 // regardless of the conditional in the if clause if, e.g., the user do not 8864 // specify target triples. 8865 if (OutlinedFnID) { 8866 if (IfCond) { 8867 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 8868 } else { 8869 RegionCodeGenTy ThenRCG(TargetThenGen); 8870 ThenRCG(CGF); 8871 } 8872 } else { 8873 RegionCodeGenTy ElseRCG(TargetElseGen); 8874 ElseRCG(CGF); 8875 } 8876 } 8877 8878 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 8879 StringRef ParentName) { 8880 if (!S) 8881 return; 8882 8883 // Codegen OMP target directives that offload compute to the device. 8884 bool RequiresDeviceCodegen = 8885 isa<OMPExecutableDirective>(S) && 8886 isOpenMPTargetExecutionDirective( 8887 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 8888 8889 if (RequiresDeviceCodegen) { 8890 const auto &E = *cast<OMPExecutableDirective>(S); 8891 unsigned DeviceID; 8892 unsigned FileID; 8893 unsigned Line; 8894 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 8895 FileID, Line); 8896 8897 // Is this a target region that should not be emitted as an entry point? If 8898 // so just signal we are done with this target region. 8899 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 8900 ParentName, Line)) 8901 return; 8902 8903 switch (E.getDirectiveKind()) { 8904 case OMPD_target: 8905 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 8906 cast<OMPTargetDirective>(E)); 8907 break; 8908 case OMPD_target_parallel: 8909 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 8910 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 8911 break; 8912 case OMPD_target_teams: 8913 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 8914 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 8915 break; 8916 case OMPD_target_teams_distribute: 8917 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 8918 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 8919 break; 8920 case OMPD_target_teams_distribute_simd: 8921 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 8922 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 8923 break; 8924 case OMPD_target_parallel_for: 8925 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 8926 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 8927 break; 8928 case OMPD_target_parallel_for_simd: 8929 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 8930 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 8931 break; 8932 case OMPD_target_simd: 8933 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 8934 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 8935 break; 8936 case OMPD_target_teams_distribute_parallel_for: 8937 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 8938 CGM, ParentName, 8939 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 8940 break; 8941 case OMPD_target_teams_distribute_parallel_for_simd: 8942 CodeGenFunction:: 8943 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 8944 CGM, ParentName, 8945 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 8946 break; 8947 case OMPD_parallel: 8948 case OMPD_for: 8949 case OMPD_parallel_for: 8950 case OMPD_parallel_sections: 8951 case OMPD_for_simd: 8952 case OMPD_parallel_for_simd: 8953 case OMPD_cancel: 8954 case OMPD_cancellation_point: 8955 case OMPD_ordered: 8956 case OMPD_threadprivate: 8957 case OMPD_allocate: 8958 case OMPD_task: 8959 case OMPD_simd: 8960 case OMPD_sections: 8961 case OMPD_section: 8962 case OMPD_single: 8963 case OMPD_master: 8964 case OMPD_critical: 8965 case OMPD_taskyield: 8966 case OMPD_barrier: 8967 case OMPD_taskwait: 8968 case OMPD_taskgroup: 8969 case OMPD_atomic: 8970 case OMPD_flush: 8971 case OMPD_teams: 8972 case OMPD_target_data: 8973 case OMPD_target_exit_data: 8974 case OMPD_target_enter_data: 8975 case OMPD_distribute: 8976 case OMPD_distribute_simd: 8977 case OMPD_distribute_parallel_for: 8978 case OMPD_distribute_parallel_for_simd: 8979 case OMPD_teams_distribute: 8980 case OMPD_teams_distribute_simd: 8981 case OMPD_teams_distribute_parallel_for: 8982 case OMPD_teams_distribute_parallel_for_simd: 8983 case OMPD_target_update: 8984 case OMPD_declare_simd: 8985 case OMPD_declare_target: 8986 case OMPD_end_declare_target: 8987 case OMPD_declare_reduction: 8988 case OMPD_declare_mapper: 8989 case OMPD_taskloop: 8990 case OMPD_taskloop_simd: 8991 case OMPD_requires: 8992 case OMPD_unknown: 8993 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 8994 } 8995 return; 8996 } 8997 8998 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 8999 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9000 return; 9001 9002 scanForTargetRegionsFunctions( 9003 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9004 return; 9005 } 9006 9007 // If this is a lambda function, look into its body. 9008 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9009 S = L->getBody(); 9010 9011 // Keep looking for target regions recursively. 9012 for (const Stmt *II : S->children()) 9013 scanForTargetRegionsFunctions(II, ParentName); 9014 } 9015 9016 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9017 // If emitting code for the host, we do not process FD here. Instead we do 9018 // the normal code generation. 9019 if (!CGM.getLangOpts().OpenMPIsDevice) 9020 return false; 9021 9022 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9023 StringRef Name = CGM.getMangledName(GD); 9024 // Try to detect target regions in the function. 9025 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) 9026 scanForTargetRegionsFunctions(FD->getBody(), Name); 9027 9028 // Do not to emit function if it is not marked as declare target. 9029 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9030 AlreadyEmittedTargetFunctions.count(Name) == 0; 9031 } 9032 9033 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9034 if (!CGM.getLangOpts().OpenMPIsDevice) 9035 return false; 9036 9037 // Check if there are Ctors/Dtors in this declaration and look for target 9038 // regions in it. We use the complete variant to produce the kernel name 9039 // mangling. 9040 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9041 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9042 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9043 StringRef ParentName = 9044 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9045 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9046 } 9047 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9048 StringRef ParentName = 9049 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9050 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9051 } 9052 } 9053 9054 // Do not to emit variable if it is not marked as declare target. 9055 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9056 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9057 cast<VarDecl>(GD.getDecl())); 9058 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { 9059 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9060 return true; 9061 } 9062 return false; 9063 } 9064 9065 llvm::Constant * 9066 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9067 const VarDecl *VD) { 9068 assert(VD->getType().isConstant(CGM.getContext()) && 9069 "Expected constant variable."); 9070 StringRef VarName; 9071 llvm::Constant *Addr; 9072 llvm::GlobalValue::LinkageTypes Linkage; 9073 QualType Ty = VD->getType(); 9074 SmallString<128> Buffer; 9075 { 9076 unsigned DeviceID; 9077 unsigned FileID; 9078 unsigned Line; 9079 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9080 FileID, Line); 9081 llvm::raw_svector_ostream OS(Buffer); 9082 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9083 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9084 VarName = OS.str(); 9085 } 9086 Linkage = llvm::GlobalValue::InternalLinkage; 9087 Addr = 9088 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9089 getDefaultFirstprivateAddressSpace()); 9090 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9091 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9092 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9093 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9094 VarName, Addr, VarSize, 9095 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9096 return Addr; 9097 } 9098 9099 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9100 llvm::Constant *Addr) { 9101 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9102 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9103 if (!Res) { 9104 if (CGM.getLangOpts().OpenMPIsDevice) { 9105 // Register non-target variables being emitted in device code (debug info 9106 // may cause this). 9107 StringRef VarName = CGM.getMangledName(VD); 9108 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9109 } 9110 return; 9111 } 9112 // Register declare target variables. 9113 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9114 StringRef VarName; 9115 CharUnits VarSize; 9116 llvm::GlobalValue::LinkageTypes Linkage; 9117 switch (*Res) { 9118 case OMPDeclareTargetDeclAttr::MT_To: 9119 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9120 VarName = CGM.getMangledName(VD); 9121 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9122 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9123 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9124 } else { 9125 VarSize = CharUnits::Zero(); 9126 } 9127 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9128 // Temp solution to prevent optimizations of the internal variables. 9129 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9130 std::string RefName = getName({VarName, "ref"}); 9131 if (!CGM.GetGlobalValue(RefName)) { 9132 llvm::Constant *AddrRef = 9133 getOrCreateInternalVariable(Addr->getType(), RefName); 9134 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9135 GVAddrRef->setConstant(/*Val=*/true); 9136 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9137 GVAddrRef->setInitializer(Addr); 9138 CGM.addCompilerUsedGlobal(GVAddrRef); 9139 } 9140 } 9141 break; 9142 case OMPDeclareTargetDeclAttr::MT_Link: 9143 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9144 if (CGM.getLangOpts().OpenMPIsDevice) { 9145 VarName = Addr->getName(); 9146 Addr = nullptr; 9147 } else { 9148 VarName = getAddrOfDeclareTargetLink(VD).getName(); 9149 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); 9150 } 9151 VarSize = CGM.getPointerSize(); 9152 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9153 break; 9154 } 9155 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9156 VarName, Addr, VarSize, Flags, Linkage); 9157 } 9158 9159 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9160 if (isa<FunctionDecl>(GD.getDecl()) || 9161 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9162 return emitTargetFunctions(GD); 9163 9164 return emitTargetGlobalVariable(GD); 9165 } 9166 9167 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9168 for (const VarDecl *VD : DeferredGlobalVariables) { 9169 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9170 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9171 if (!Res) 9172 continue; 9173 if (*Res == OMPDeclareTargetDeclAttr::MT_To) { 9174 CGM.EmitGlobal(VD); 9175 } else { 9176 assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && 9177 "Expected to or link clauses."); 9178 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 9179 } 9180 } 9181 } 9182 9183 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9184 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9185 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9186 " Expected target-based directive."); 9187 } 9188 9189 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9190 LangAS &AS) { 9191 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9192 return false; 9193 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9194 switch(A->getAllocatorType()) { 9195 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9196 // Not supported, fallback to the default mem space. 9197 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9198 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9199 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9200 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9201 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9202 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9203 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9204 AS = LangAS::Default; 9205 return true; 9206 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9207 llvm_unreachable("Expected predefined allocator for the variables with the " 9208 "static storage."); 9209 } 9210 return false; 9211 } 9212 9213 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9214 CodeGenModule &CGM) 9215 : CGM(CGM) { 9216 if (CGM.getLangOpts().OpenMPIsDevice) { 9217 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9218 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9219 } 9220 } 9221 9222 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9223 if (CGM.getLangOpts().OpenMPIsDevice) 9224 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9225 } 9226 9227 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9228 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9229 return true; 9230 9231 StringRef Name = CGM.getMangledName(GD); 9232 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9233 // Do not to emit function if it is marked as declare target as it was already 9234 // emitted. 9235 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9236 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9237 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9238 return !F->isDeclaration(); 9239 return false; 9240 } 9241 return true; 9242 } 9243 9244 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9245 } 9246 9247 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 9248 // If we have offloading in the current module, we need to emit the entries 9249 // now and register the offloading descriptor. 9250 createOffloadEntriesAndInfoMetadata(); 9251 9252 // Create and register the offloading binary descriptors. This is the main 9253 // entity that captures all the information about offloading in the current 9254 // compilation unit. 9255 return createOffloadingBinaryDescriptorRegistration(); 9256 } 9257 9258 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9259 const OMPExecutableDirective &D, 9260 SourceLocation Loc, 9261 llvm::Function *OutlinedFn, 9262 ArrayRef<llvm::Value *> CapturedVars) { 9263 if (!CGF.HaveInsertPoint()) 9264 return; 9265 9266 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9267 CodeGenFunction::RunCleanupsScope Scope(CGF); 9268 9269 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9270 llvm::Value *Args[] = { 9271 RTLoc, 9272 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9273 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9274 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9275 RealArgs.append(std::begin(Args), std::end(Args)); 9276 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9277 9278 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9279 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9280 } 9281 9282 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9283 const Expr *NumTeams, 9284 const Expr *ThreadLimit, 9285 SourceLocation Loc) { 9286 if (!CGF.HaveInsertPoint()) 9287 return; 9288 9289 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9290 9291 llvm::Value *NumTeamsVal = 9292 NumTeams 9293 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9294 CGF.CGM.Int32Ty, /* isSigned = */ true) 9295 : CGF.Builder.getInt32(0); 9296 9297 llvm::Value *ThreadLimitVal = 9298 ThreadLimit 9299 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9300 CGF.CGM.Int32Ty, /* isSigned = */ true) 9301 : CGF.Builder.getInt32(0); 9302 9303 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9304 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9305 ThreadLimitVal}; 9306 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9307 PushNumTeamsArgs); 9308 } 9309 9310 void CGOpenMPRuntime::emitTargetDataCalls( 9311 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9312 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9313 if (!CGF.HaveInsertPoint()) 9314 return; 9315 9316 // Action used to replace the default codegen action and turn privatization 9317 // off. 9318 PrePostActionTy NoPrivAction; 9319 9320 // Generate the code for the opening of the data environment. Capture all the 9321 // arguments of the runtime call by reference because they are used in the 9322 // closing of the region. 9323 auto &&BeginThenGen = [this, &D, Device, &Info, 9324 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9325 // Fill up the arrays with all the mapped variables. 9326 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9327 MappableExprsHandler::MapValuesArrayTy Pointers; 9328 MappableExprsHandler::MapValuesArrayTy Sizes; 9329 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9330 9331 // Get map clause information. 9332 MappableExprsHandler MCHandler(D, CGF); 9333 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9334 9335 // Fill up the arrays and create the arguments. 9336 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9337 9338 llvm::Value *BasePointersArrayArg = nullptr; 9339 llvm::Value *PointersArrayArg = nullptr; 9340 llvm::Value *SizesArrayArg = nullptr; 9341 llvm::Value *MapTypesArrayArg = nullptr; 9342 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9343 SizesArrayArg, MapTypesArrayArg, Info); 9344 9345 // Emit device ID if any. 9346 llvm::Value *DeviceID = nullptr; 9347 if (Device) { 9348 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9349 CGF.Int64Ty, /*isSigned=*/true); 9350 } else { 9351 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9352 } 9353 9354 // Emit the number of elements in the offloading arrays. 9355 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9356 9357 llvm::Value *OffloadingArgs[] = { 9358 DeviceID, PointerNum, BasePointersArrayArg, 9359 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9360 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 9361 OffloadingArgs); 9362 9363 // If device pointer privatization is required, emit the body of the region 9364 // here. It will have to be duplicated: with and without privatization. 9365 if (!Info.CaptureDeviceAddrMap.empty()) 9366 CodeGen(CGF); 9367 }; 9368 9369 // Generate code for the closing of the data region. 9370 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 9371 PrePostActionTy &) { 9372 assert(Info.isValid() && "Invalid data environment closing arguments."); 9373 9374 llvm::Value *BasePointersArrayArg = nullptr; 9375 llvm::Value *PointersArrayArg = nullptr; 9376 llvm::Value *SizesArrayArg = nullptr; 9377 llvm::Value *MapTypesArrayArg = nullptr; 9378 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9379 SizesArrayArg, MapTypesArrayArg, Info); 9380 9381 // Emit device ID if any. 9382 llvm::Value *DeviceID = nullptr; 9383 if (Device) { 9384 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9385 CGF.Int64Ty, /*isSigned=*/true); 9386 } else { 9387 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9388 } 9389 9390 // Emit the number of elements in the offloading arrays. 9391 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9392 9393 llvm::Value *OffloadingArgs[] = { 9394 DeviceID, PointerNum, BasePointersArrayArg, 9395 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9396 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 9397 OffloadingArgs); 9398 }; 9399 9400 // If we need device pointer privatization, we need to emit the body of the 9401 // region with no privatization in the 'else' branch of the conditional. 9402 // Otherwise, we don't have to do anything. 9403 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 9404 PrePostActionTy &) { 9405 if (!Info.CaptureDeviceAddrMap.empty()) { 9406 CodeGen.setAction(NoPrivAction); 9407 CodeGen(CGF); 9408 } 9409 }; 9410 9411 // We don't have to do anything to close the region if the if clause evaluates 9412 // to false. 9413 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 9414 9415 if (IfCond) { 9416 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 9417 } else { 9418 RegionCodeGenTy RCG(BeginThenGen); 9419 RCG(CGF); 9420 } 9421 9422 // If we don't require privatization of device pointers, we emit the body in 9423 // between the runtime calls. This avoids duplicating the body code. 9424 if (Info.CaptureDeviceAddrMap.empty()) { 9425 CodeGen.setAction(NoPrivAction); 9426 CodeGen(CGF); 9427 } 9428 9429 if (IfCond) { 9430 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 9431 } else { 9432 RegionCodeGenTy RCG(EndThenGen); 9433 RCG(CGF); 9434 } 9435 } 9436 9437 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 9438 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9439 const Expr *Device) { 9440 if (!CGF.HaveInsertPoint()) 9441 return; 9442 9443 assert((isa<OMPTargetEnterDataDirective>(D) || 9444 isa<OMPTargetExitDataDirective>(D) || 9445 isa<OMPTargetUpdateDirective>(D)) && 9446 "Expecting either target enter, exit data, or update directives."); 9447 9448 CodeGenFunction::OMPTargetDataInfo InputInfo; 9449 llvm::Value *MapTypesArray = nullptr; 9450 // Generate the code for the opening of the data environment. 9451 auto &&ThenGen = [this, &D, Device, &InputInfo, 9452 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 9453 // Emit device ID if any. 9454 llvm::Value *DeviceID = nullptr; 9455 if (Device) { 9456 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9457 CGF.Int64Ty, /*isSigned=*/true); 9458 } else { 9459 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9460 } 9461 9462 // Emit the number of elements in the offloading arrays. 9463 llvm::Constant *PointerNum = 9464 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9465 9466 llvm::Value *OffloadingArgs[] = {DeviceID, 9467 PointerNum, 9468 InputInfo.BasePointersArray.getPointer(), 9469 InputInfo.PointersArray.getPointer(), 9470 InputInfo.SizesArray.getPointer(), 9471 MapTypesArray}; 9472 9473 // Select the right runtime function call for each expected standalone 9474 // directive. 9475 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9476 OpenMPRTLFunction RTLFn; 9477 switch (D.getDirectiveKind()) { 9478 case OMPD_target_enter_data: 9479 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 9480 : OMPRTL__tgt_target_data_begin; 9481 break; 9482 case OMPD_target_exit_data: 9483 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 9484 : OMPRTL__tgt_target_data_end; 9485 break; 9486 case OMPD_target_update: 9487 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 9488 : OMPRTL__tgt_target_data_update; 9489 break; 9490 case OMPD_parallel: 9491 case OMPD_for: 9492 case OMPD_parallel_for: 9493 case OMPD_parallel_sections: 9494 case OMPD_for_simd: 9495 case OMPD_parallel_for_simd: 9496 case OMPD_cancel: 9497 case OMPD_cancellation_point: 9498 case OMPD_ordered: 9499 case OMPD_threadprivate: 9500 case OMPD_allocate: 9501 case OMPD_task: 9502 case OMPD_simd: 9503 case OMPD_sections: 9504 case OMPD_section: 9505 case OMPD_single: 9506 case OMPD_master: 9507 case OMPD_critical: 9508 case OMPD_taskyield: 9509 case OMPD_barrier: 9510 case OMPD_taskwait: 9511 case OMPD_taskgroup: 9512 case OMPD_atomic: 9513 case OMPD_flush: 9514 case OMPD_teams: 9515 case OMPD_target_data: 9516 case OMPD_distribute: 9517 case OMPD_distribute_simd: 9518 case OMPD_distribute_parallel_for: 9519 case OMPD_distribute_parallel_for_simd: 9520 case OMPD_teams_distribute: 9521 case OMPD_teams_distribute_simd: 9522 case OMPD_teams_distribute_parallel_for: 9523 case OMPD_teams_distribute_parallel_for_simd: 9524 case OMPD_declare_simd: 9525 case OMPD_declare_target: 9526 case OMPD_end_declare_target: 9527 case OMPD_declare_reduction: 9528 case OMPD_declare_mapper: 9529 case OMPD_taskloop: 9530 case OMPD_taskloop_simd: 9531 case OMPD_target: 9532 case OMPD_target_simd: 9533 case OMPD_target_teams_distribute: 9534 case OMPD_target_teams_distribute_simd: 9535 case OMPD_target_teams_distribute_parallel_for: 9536 case OMPD_target_teams_distribute_parallel_for_simd: 9537 case OMPD_target_teams: 9538 case OMPD_target_parallel: 9539 case OMPD_target_parallel_for: 9540 case OMPD_target_parallel_for_simd: 9541 case OMPD_requires: 9542 case OMPD_unknown: 9543 llvm_unreachable("Unexpected standalone target data directive."); 9544 break; 9545 } 9546 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 9547 }; 9548 9549 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 9550 CodeGenFunction &CGF, PrePostActionTy &) { 9551 // Fill up the arrays with all the mapped variables. 9552 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9553 MappableExprsHandler::MapValuesArrayTy Pointers; 9554 MappableExprsHandler::MapValuesArrayTy Sizes; 9555 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9556 9557 // Get map clause information. 9558 MappableExprsHandler MEHandler(D, CGF); 9559 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9560 9561 TargetDataInfo Info; 9562 // Fill up the arrays and create the arguments. 9563 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9564 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9565 Info.PointersArray, Info.SizesArray, 9566 Info.MapTypesArray, Info); 9567 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9568 InputInfo.BasePointersArray = 9569 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9570 InputInfo.PointersArray = 9571 Address(Info.PointersArray, CGM.getPointerAlign()); 9572 InputInfo.SizesArray = 9573 Address(Info.SizesArray, CGM.getPointerAlign()); 9574 MapTypesArray = Info.MapTypesArray; 9575 if (D.hasClausesOfKind<OMPDependClause>()) 9576 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9577 else 9578 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9579 }; 9580 9581 if (IfCond) { 9582 emitOMPIfClause(CGF, IfCond, TargetThenGen, 9583 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 9584 } else { 9585 RegionCodeGenTy ThenRCG(TargetThenGen); 9586 ThenRCG(CGF); 9587 } 9588 } 9589 9590 namespace { 9591 /// Kind of parameter in a function with 'declare simd' directive. 9592 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 9593 /// Attribute set of the parameter. 9594 struct ParamAttrTy { 9595 ParamKindTy Kind = Vector; 9596 llvm::APSInt StrideOrArg; 9597 llvm::APSInt Alignment; 9598 }; 9599 } // namespace 9600 9601 static unsigned evaluateCDTSize(const FunctionDecl *FD, 9602 ArrayRef<ParamAttrTy> ParamAttrs) { 9603 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 9604 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 9605 // of that clause. The VLEN value must be power of 2. 9606 // In other case the notion of the function`s "characteristic data type" (CDT) 9607 // is used to compute the vector length. 9608 // CDT is defined in the following order: 9609 // a) For non-void function, the CDT is the return type. 9610 // b) If the function has any non-uniform, non-linear parameters, then the 9611 // CDT is the type of the first such parameter. 9612 // c) If the CDT determined by a) or b) above is struct, union, or class 9613 // type which is pass-by-value (except for the type that maps to the 9614 // built-in complex data type), the characteristic data type is int. 9615 // d) If none of the above three cases is applicable, the CDT is int. 9616 // The VLEN is then determined based on the CDT and the size of vector 9617 // register of that ISA for which current vector version is generated. The 9618 // VLEN is computed using the formula below: 9619 // VLEN = sizeof(vector_register) / sizeof(CDT), 9620 // where vector register size specified in section 3.2.1 Registers and the 9621 // Stack Frame of original AMD64 ABI document. 9622 QualType RetType = FD->getReturnType(); 9623 if (RetType.isNull()) 9624 return 0; 9625 ASTContext &C = FD->getASTContext(); 9626 QualType CDT; 9627 if (!RetType.isNull() && !RetType->isVoidType()) { 9628 CDT = RetType; 9629 } else { 9630 unsigned Offset = 0; 9631 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 9632 if (ParamAttrs[Offset].Kind == Vector) 9633 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 9634 ++Offset; 9635 } 9636 if (CDT.isNull()) { 9637 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9638 if (ParamAttrs[I + Offset].Kind == Vector) { 9639 CDT = FD->getParamDecl(I)->getType(); 9640 break; 9641 } 9642 } 9643 } 9644 } 9645 if (CDT.isNull()) 9646 CDT = C.IntTy; 9647 CDT = CDT->getCanonicalTypeUnqualified(); 9648 if (CDT->isRecordType() || CDT->isUnionType()) 9649 CDT = C.IntTy; 9650 return C.getTypeSize(CDT); 9651 } 9652 9653 static void 9654 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 9655 const llvm::APSInt &VLENVal, 9656 ArrayRef<ParamAttrTy> ParamAttrs, 9657 OMPDeclareSimdDeclAttr::BranchStateTy State) { 9658 struct ISADataTy { 9659 char ISA; 9660 unsigned VecRegSize; 9661 }; 9662 ISADataTy ISAData[] = { 9663 { 9664 'b', 128 9665 }, // SSE 9666 { 9667 'c', 256 9668 }, // AVX 9669 { 9670 'd', 256 9671 }, // AVX2 9672 { 9673 'e', 512 9674 }, // AVX512 9675 }; 9676 llvm::SmallVector<char, 2> Masked; 9677 switch (State) { 9678 case OMPDeclareSimdDeclAttr::BS_Undefined: 9679 Masked.push_back('N'); 9680 Masked.push_back('M'); 9681 break; 9682 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9683 Masked.push_back('N'); 9684 break; 9685 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9686 Masked.push_back('M'); 9687 break; 9688 } 9689 for (char Mask : Masked) { 9690 for (const ISADataTy &Data : ISAData) { 9691 SmallString<256> Buffer; 9692 llvm::raw_svector_ostream Out(Buffer); 9693 Out << "_ZGV" << Data.ISA << Mask; 9694 if (!VLENVal) { 9695 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 9696 evaluateCDTSize(FD, ParamAttrs)); 9697 } else { 9698 Out << VLENVal; 9699 } 9700 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 9701 switch (ParamAttr.Kind){ 9702 case LinearWithVarStride: 9703 Out << 's' << ParamAttr.StrideOrArg; 9704 break; 9705 case Linear: 9706 Out << 'l'; 9707 if (!!ParamAttr.StrideOrArg) 9708 Out << ParamAttr.StrideOrArg; 9709 break; 9710 case Uniform: 9711 Out << 'u'; 9712 break; 9713 case Vector: 9714 Out << 'v'; 9715 break; 9716 } 9717 if (!!ParamAttr.Alignment) 9718 Out << 'a' << ParamAttr.Alignment; 9719 } 9720 Out << '_' << Fn->getName(); 9721 Fn->addFnAttr(Out.str()); 9722 } 9723 } 9724 } 9725 9726 // This are the Functions that are needed to mangle the name of the 9727 // vector functions generated by the compiler, according to the rules 9728 // defined in the "Vector Function ABI specifications for AArch64", 9729 // available at 9730 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 9731 9732 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 9733 /// 9734 /// TODO: Need to implement the behavior for reference marked with a 9735 /// var or no linear modifiers (1.b in the section). For this, we 9736 /// need to extend ParamKindTy to support the linear modifiers. 9737 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 9738 QT = QT.getCanonicalType(); 9739 9740 if (QT->isVoidType()) 9741 return false; 9742 9743 if (Kind == ParamKindTy::Uniform) 9744 return false; 9745 9746 if (Kind == ParamKindTy::Linear) 9747 return false; 9748 9749 // TODO: Handle linear references with modifiers 9750 9751 if (Kind == ParamKindTy::LinearWithVarStride) 9752 return false; 9753 9754 return true; 9755 } 9756 9757 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 9758 static bool getAArch64PBV(QualType QT, ASTContext &C) { 9759 QT = QT.getCanonicalType(); 9760 unsigned Size = C.getTypeSize(QT); 9761 9762 // Only scalars and complex within 16 bytes wide set PVB to true. 9763 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 9764 return false; 9765 9766 if (QT->isFloatingType()) 9767 return true; 9768 9769 if (QT->isIntegerType()) 9770 return true; 9771 9772 if (QT->isPointerType()) 9773 return true; 9774 9775 // TODO: Add support for complex types (section 3.1.2, item 2). 9776 9777 return false; 9778 } 9779 9780 /// Computes the lane size (LS) of a return type or of an input parameter, 9781 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 9782 /// TODO: Add support for references, section 3.2.1, item 1. 9783 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 9784 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 9785 QualType PTy = QT.getCanonicalType()->getPointeeType(); 9786 if (getAArch64PBV(PTy, C)) 9787 return C.getTypeSize(PTy); 9788 } 9789 if (getAArch64PBV(QT, C)) 9790 return C.getTypeSize(QT); 9791 9792 return C.getTypeSize(C.getUIntPtrType()); 9793 } 9794 9795 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 9796 // signature of the scalar function, as defined in 3.2.2 of the 9797 // AAVFABI. 9798 static std::tuple<unsigned, unsigned, bool> 9799 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 9800 QualType RetType = FD->getReturnType().getCanonicalType(); 9801 9802 ASTContext &C = FD->getASTContext(); 9803 9804 bool OutputBecomesInput = false; 9805 9806 llvm::SmallVector<unsigned, 8> Sizes; 9807 if (!RetType->isVoidType()) { 9808 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 9809 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 9810 OutputBecomesInput = true; 9811 } 9812 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9813 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 9814 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 9815 } 9816 9817 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 9818 // The LS of a function parameter / return value can only be a power 9819 // of 2, starting from 8 bits, up to 128. 9820 assert(std::all_of(Sizes.begin(), Sizes.end(), 9821 [](unsigned Size) { 9822 return Size == 8 || Size == 16 || Size == 32 || 9823 Size == 64 || Size == 128; 9824 }) && 9825 "Invalid size"); 9826 9827 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 9828 *std::max_element(std::begin(Sizes), std::end(Sizes)), 9829 OutputBecomesInput); 9830 } 9831 9832 /// Mangle the parameter part of the vector function name according to 9833 /// their OpenMP classification. The mangling function is defined in 9834 /// section 3.5 of the AAVFABI. 9835 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 9836 SmallString<256> Buffer; 9837 llvm::raw_svector_ostream Out(Buffer); 9838 for (const auto &ParamAttr : ParamAttrs) { 9839 switch (ParamAttr.Kind) { 9840 case LinearWithVarStride: 9841 Out << "ls" << ParamAttr.StrideOrArg; 9842 break; 9843 case Linear: 9844 Out << 'l'; 9845 // Don't print the step value if it is not present or if it is 9846 // equal to 1. 9847 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 9848 Out << ParamAttr.StrideOrArg; 9849 break; 9850 case Uniform: 9851 Out << 'u'; 9852 break; 9853 case Vector: 9854 Out << 'v'; 9855 break; 9856 } 9857 9858 if (!!ParamAttr.Alignment) 9859 Out << 'a' << ParamAttr.Alignment; 9860 } 9861 9862 return Out.str(); 9863 } 9864 9865 // Function used to add the attribute. The parameter `VLEN` is 9866 // templated to allow the use of "x" when targeting scalable functions 9867 // for SVE. 9868 template <typename T> 9869 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 9870 char ISA, StringRef ParSeq, 9871 StringRef MangledName, bool OutputBecomesInput, 9872 llvm::Function *Fn) { 9873 SmallString<256> Buffer; 9874 llvm::raw_svector_ostream Out(Buffer); 9875 Out << Prefix << ISA << LMask << VLEN; 9876 if (OutputBecomesInput) 9877 Out << "v"; 9878 Out << ParSeq << "_" << MangledName; 9879 Fn->addFnAttr(Out.str()); 9880 } 9881 9882 // Helper function to generate the Advanced SIMD names depending on 9883 // the value of the NDS when simdlen is not present. 9884 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 9885 StringRef Prefix, char ISA, 9886 StringRef ParSeq, StringRef MangledName, 9887 bool OutputBecomesInput, 9888 llvm::Function *Fn) { 9889 switch (NDS) { 9890 case 8: 9891 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 9892 OutputBecomesInput, Fn); 9893 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 9894 OutputBecomesInput, Fn); 9895 break; 9896 case 16: 9897 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 9898 OutputBecomesInput, Fn); 9899 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 9900 OutputBecomesInput, Fn); 9901 break; 9902 case 32: 9903 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 9904 OutputBecomesInput, Fn); 9905 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 9906 OutputBecomesInput, Fn); 9907 break; 9908 case 64: 9909 case 128: 9910 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 9911 OutputBecomesInput, Fn); 9912 break; 9913 default: 9914 llvm_unreachable("Scalar type is too wide."); 9915 } 9916 } 9917 9918 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 9919 static void emitAArch64DeclareSimdFunction( 9920 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 9921 ArrayRef<ParamAttrTy> ParamAttrs, 9922 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 9923 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 9924 9925 // Get basic data for building the vector signature. 9926 const auto Data = getNDSWDS(FD, ParamAttrs); 9927 const unsigned NDS = std::get<0>(Data); 9928 const unsigned WDS = std::get<1>(Data); 9929 const bool OutputBecomesInput = std::get<2>(Data); 9930 9931 // Check the values provided via `simdlen` by the user. 9932 // 1. A `simdlen(1)` doesn't produce vector signatures, 9933 if (UserVLEN == 1) { 9934 unsigned DiagID = CGM.getDiags().getCustomDiagID( 9935 DiagnosticsEngine::Warning, 9936 "The clause simdlen(1) has no effect when targeting aarch64."); 9937 CGM.getDiags().Report(SLoc, DiagID); 9938 return; 9939 } 9940 9941 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 9942 // Advanced SIMD output. 9943 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 9944 unsigned DiagID = CGM.getDiags().getCustomDiagID( 9945 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 9946 "power of 2 when targeting Advanced SIMD."); 9947 CGM.getDiags().Report(SLoc, DiagID); 9948 return; 9949 } 9950 9951 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 9952 // limits. 9953 if (ISA == 's' && UserVLEN != 0) { 9954 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 9955 unsigned DiagID = CGM.getDiags().getCustomDiagID( 9956 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 9957 "lanes in the architectural constraints " 9958 "for SVE (min is 128-bit, max is " 9959 "2048-bit, by steps of 128-bit)"); 9960 CGM.getDiags().Report(SLoc, DiagID) << WDS; 9961 return; 9962 } 9963 } 9964 9965 // Sort out parameter sequence. 9966 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 9967 StringRef Prefix = "_ZGV"; 9968 // Generate simdlen from user input (if any). 9969 if (UserVLEN) { 9970 if (ISA == 's') { 9971 // SVE generates only a masked function. 9972 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 9973 OutputBecomesInput, Fn); 9974 } else { 9975 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 9976 // Advanced SIMD generates one or two functions, depending on 9977 // the `[not]inbranch` clause. 9978 switch (State) { 9979 case OMPDeclareSimdDeclAttr::BS_Undefined: 9980 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 9981 OutputBecomesInput, Fn); 9982 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 9983 OutputBecomesInput, Fn); 9984 break; 9985 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9986 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 9987 OutputBecomesInput, Fn); 9988 break; 9989 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9990 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 9991 OutputBecomesInput, Fn); 9992 break; 9993 } 9994 } 9995 } else { 9996 // If no user simdlen is provided, follow the AAVFABI rules for 9997 // generating the vector length. 9998 if (ISA == 's') { 9999 // SVE, section 3.4.1, item 1. 10000 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10001 OutputBecomesInput, Fn); 10002 } else { 10003 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10004 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10005 // two vector names depending on the use of the clause 10006 // `[not]inbranch`. 10007 switch (State) { 10008 case OMPDeclareSimdDeclAttr::BS_Undefined: 10009 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10010 OutputBecomesInput, Fn); 10011 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10012 OutputBecomesInput, Fn); 10013 break; 10014 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10015 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10016 OutputBecomesInput, Fn); 10017 break; 10018 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10019 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10020 OutputBecomesInput, Fn); 10021 break; 10022 } 10023 } 10024 } 10025 } 10026 10027 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10028 llvm::Function *Fn) { 10029 ASTContext &C = CGM.getContext(); 10030 FD = FD->getMostRecentDecl(); 10031 // Map params to their positions in function decl. 10032 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10033 if (isa<CXXMethodDecl>(FD)) 10034 ParamPositions.try_emplace(FD, 0); 10035 unsigned ParamPos = ParamPositions.size(); 10036 for (const ParmVarDecl *P : FD->parameters()) { 10037 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10038 ++ParamPos; 10039 } 10040 while (FD) { 10041 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10042 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10043 // Mark uniform parameters. 10044 for (const Expr *E : Attr->uniforms()) { 10045 E = E->IgnoreParenImpCasts(); 10046 unsigned Pos; 10047 if (isa<CXXThisExpr>(E)) { 10048 Pos = ParamPositions[FD]; 10049 } else { 10050 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10051 ->getCanonicalDecl(); 10052 Pos = ParamPositions[PVD]; 10053 } 10054 ParamAttrs[Pos].Kind = Uniform; 10055 } 10056 // Get alignment info. 10057 auto NI = Attr->alignments_begin(); 10058 for (const Expr *E : Attr->aligneds()) { 10059 E = E->IgnoreParenImpCasts(); 10060 unsigned Pos; 10061 QualType ParmTy; 10062 if (isa<CXXThisExpr>(E)) { 10063 Pos = ParamPositions[FD]; 10064 ParmTy = E->getType(); 10065 } else { 10066 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10067 ->getCanonicalDecl(); 10068 Pos = ParamPositions[PVD]; 10069 ParmTy = PVD->getType(); 10070 } 10071 ParamAttrs[Pos].Alignment = 10072 (*NI) 10073 ? (*NI)->EvaluateKnownConstInt(C) 10074 : llvm::APSInt::getUnsigned( 10075 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10076 .getQuantity()); 10077 ++NI; 10078 } 10079 // Mark linear parameters. 10080 auto SI = Attr->steps_begin(); 10081 auto MI = Attr->modifiers_begin(); 10082 for (const Expr *E : Attr->linears()) { 10083 E = E->IgnoreParenImpCasts(); 10084 unsigned Pos; 10085 if (isa<CXXThisExpr>(E)) { 10086 Pos = ParamPositions[FD]; 10087 } else { 10088 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10089 ->getCanonicalDecl(); 10090 Pos = ParamPositions[PVD]; 10091 } 10092 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10093 ParamAttr.Kind = Linear; 10094 if (*SI) { 10095 Expr::EvalResult Result; 10096 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10097 if (const auto *DRE = 10098 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10099 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10100 ParamAttr.Kind = LinearWithVarStride; 10101 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10102 ParamPositions[StridePVD->getCanonicalDecl()]); 10103 } 10104 } 10105 } else { 10106 ParamAttr.StrideOrArg = Result.Val.getInt(); 10107 } 10108 } 10109 ++SI; 10110 ++MI; 10111 } 10112 llvm::APSInt VLENVal; 10113 SourceLocation ExprLoc; 10114 const Expr *VLENExpr = Attr->getSimdlen(); 10115 if (VLENExpr) { 10116 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10117 ExprLoc = VLENExpr->getExprLoc(); 10118 } 10119 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10120 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10121 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10122 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10123 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10124 unsigned VLEN = VLENVal.getExtValue(); 10125 StringRef MangledName = Fn->getName(); 10126 if (CGM.getTarget().hasFeature("sve")) 10127 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10128 MangledName, 's', 128, Fn, ExprLoc); 10129 if (CGM.getTarget().hasFeature("neon")) 10130 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10131 MangledName, 'n', 128, Fn, ExprLoc); 10132 } 10133 } 10134 FD = FD->getPreviousDecl(); 10135 } 10136 } 10137 10138 namespace { 10139 /// Cleanup action for doacross support. 10140 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10141 public: 10142 static const int DoacrossFinArgs = 2; 10143 10144 private: 10145 llvm::FunctionCallee RTLFn; 10146 llvm::Value *Args[DoacrossFinArgs]; 10147 10148 public: 10149 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10150 ArrayRef<llvm::Value *> CallArgs) 10151 : RTLFn(RTLFn) { 10152 assert(CallArgs.size() == DoacrossFinArgs); 10153 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10154 } 10155 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10156 if (!CGF.HaveInsertPoint()) 10157 return; 10158 CGF.EmitRuntimeCall(RTLFn, Args); 10159 } 10160 }; 10161 } // namespace 10162 10163 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10164 const OMPLoopDirective &D, 10165 ArrayRef<Expr *> NumIterations) { 10166 if (!CGF.HaveInsertPoint()) 10167 return; 10168 10169 ASTContext &C = CGM.getContext(); 10170 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10171 RecordDecl *RD; 10172 if (KmpDimTy.isNull()) { 10173 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10174 // kmp_int64 lo; // lower 10175 // kmp_int64 up; // upper 10176 // kmp_int64 st; // stride 10177 // }; 10178 RD = C.buildImplicitRecord("kmp_dim"); 10179 RD->startDefinition(); 10180 addFieldToRecordDecl(C, RD, Int64Ty); 10181 addFieldToRecordDecl(C, RD, Int64Ty); 10182 addFieldToRecordDecl(C, RD, Int64Ty); 10183 RD->completeDefinition(); 10184 KmpDimTy = C.getRecordType(RD); 10185 } else { 10186 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10187 } 10188 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10189 QualType ArrayTy = 10190 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 10191 10192 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10193 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10194 enum { LowerFD = 0, UpperFD, StrideFD }; 10195 // Fill dims with data. 10196 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10197 LValue DimsLVal = CGF.MakeAddrLValue( 10198 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10199 // dims.upper = num_iterations; 10200 LValue UpperLVal = CGF.EmitLValueForField( 10201 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10202 llvm::Value *NumIterVal = 10203 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10204 D.getNumIterations()->getType(), Int64Ty, 10205 D.getNumIterations()->getExprLoc()); 10206 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10207 // dims.stride = 1; 10208 LValue StrideLVal = CGF.EmitLValueForField( 10209 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10210 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10211 StrideLVal); 10212 } 10213 10214 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10215 // kmp_int32 num_dims, struct kmp_dim * dims); 10216 llvm::Value *Args[] = { 10217 emitUpdateLocation(CGF, D.getBeginLoc()), 10218 getThreadID(CGF, D.getBeginLoc()), 10219 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10220 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10221 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10222 CGM.VoidPtrTy)}; 10223 10224 llvm::FunctionCallee RTLFn = 10225 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10226 CGF.EmitRuntimeCall(RTLFn, Args); 10227 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10228 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10229 llvm::FunctionCallee FiniRTLFn = 10230 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10231 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10232 llvm::makeArrayRef(FiniArgs)); 10233 } 10234 10235 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10236 const OMPDependClause *C) { 10237 QualType Int64Ty = 10238 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10239 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10240 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10241 Int64Ty, Size, ArrayType::Normal, 0); 10242 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10243 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10244 const Expr *CounterVal = C->getLoopData(I); 10245 assert(CounterVal); 10246 llvm::Value *CntVal = CGF.EmitScalarConversion( 10247 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10248 CounterVal->getExprLoc()); 10249 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10250 /*Volatile=*/false, Int64Ty); 10251 } 10252 llvm::Value *Args[] = { 10253 emitUpdateLocation(CGF, C->getBeginLoc()), 10254 getThreadID(CGF, C->getBeginLoc()), 10255 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10256 llvm::FunctionCallee RTLFn; 10257 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10258 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10259 } else { 10260 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10261 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10262 } 10263 CGF.EmitRuntimeCall(RTLFn, Args); 10264 } 10265 10266 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10267 llvm::FunctionCallee Callee, 10268 ArrayRef<llvm::Value *> Args) const { 10269 assert(Loc.isValid() && "Outlined function call location must be valid."); 10270 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10271 10272 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10273 if (Fn->doesNotThrow()) { 10274 CGF.EmitNounwindRuntimeCall(Fn, Args); 10275 return; 10276 } 10277 } 10278 CGF.EmitRuntimeCall(Callee, Args); 10279 } 10280 10281 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10282 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10283 ArrayRef<llvm::Value *> Args) const { 10284 emitCall(CGF, Loc, OutlinedFn, Args); 10285 } 10286 10287 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10288 const VarDecl *NativeParam, 10289 const VarDecl *TargetParam) const { 10290 return CGF.GetAddrOfLocalVar(NativeParam); 10291 } 10292 10293 namespace { 10294 /// Cleanup action for allocate support. 10295 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10296 public: 10297 static const int CleanupArgs = 3; 10298 10299 private: 10300 llvm::FunctionCallee RTLFn; 10301 llvm::Value *Args[CleanupArgs]; 10302 10303 public: 10304 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10305 ArrayRef<llvm::Value *> CallArgs) 10306 : RTLFn(RTLFn) { 10307 assert(CallArgs.size() == CleanupArgs && 10308 "Size of arguments does not match."); 10309 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10310 } 10311 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10312 if (!CGF.HaveInsertPoint()) 10313 return; 10314 CGF.EmitRuntimeCall(RTLFn, Args); 10315 } 10316 }; 10317 } // namespace 10318 10319 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10320 const VarDecl *VD) { 10321 if (!VD) 10322 return Address::invalid(); 10323 const VarDecl *CVD = VD->getCanonicalDecl(); 10324 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10325 return Address::invalid(); 10326 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10327 // Use the default allocation. 10328 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10329 !AA->getAllocator()) 10330 return Address::invalid(); 10331 llvm::Value *Size; 10332 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10333 if (CVD->getType()->isVariablyModifiedType()) { 10334 Size = CGF.getTypeSize(CVD->getType()); 10335 // Align the size: ((size + align - 1) / align) * align 10336 Size = CGF.Builder.CreateNUWAdd( 10337 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10338 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10339 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10340 } else { 10341 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10342 Size = CGM.getSize(Sz.alignTo(Align)); 10343 } 10344 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10345 assert(AA->getAllocator() && 10346 "Expected allocator expression for non-default allocator."); 10347 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 10348 // According to the standard, the original allocator type is a enum (integer). 10349 // Convert to pointer type, if required. 10350 if (Allocator->getType()->isIntegerTy()) 10351 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 10352 else if (Allocator->getType()->isPointerTy()) 10353 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 10354 CGM.VoidPtrTy); 10355 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 10356 10357 llvm::Value *Addr = 10358 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 10359 CVD->getName() + ".void.addr"); 10360 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 10361 Allocator}; 10362 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 10363 10364 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10365 llvm::makeArrayRef(FiniArgs)); 10366 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10367 Addr, 10368 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 10369 CVD->getName() + ".addr"); 10370 return Address(Addr, Align); 10371 } 10372 10373 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 10374 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10375 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10376 llvm_unreachable("Not supported in SIMD-only mode"); 10377 } 10378 10379 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 10380 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10381 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10382 llvm_unreachable("Not supported in SIMD-only mode"); 10383 } 10384 10385 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 10386 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10387 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 10388 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 10389 bool Tied, unsigned &NumberOfParts) { 10390 llvm_unreachable("Not supported in SIMD-only mode"); 10391 } 10392 10393 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 10394 SourceLocation Loc, 10395 llvm::Function *OutlinedFn, 10396 ArrayRef<llvm::Value *> CapturedVars, 10397 const Expr *IfCond) { 10398 llvm_unreachable("Not supported in SIMD-only mode"); 10399 } 10400 10401 void CGOpenMPSIMDRuntime::emitCriticalRegion( 10402 CodeGenFunction &CGF, StringRef CriticalName, 10403 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 10404 const Expr *Hint) { 10405 llvm_unreachable("Not supported in SIMD-only mode"); 10406 } 10407 10408 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 10409 const RegionCodeGenTy &MasterOpGen, 10410 SourceLocation Loc) { 10411 llvm_unreachable("Not supported in SIMD-only mode"); 10412 } 10413 10414 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 10415 SourceLocation Loc) { 10416 llvm_unreachable("Not supported in SIMD-only mode"); 10417 } 10418 10419 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 10420 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 10421 SourceLocation Loc) { 10422 llvm_unreachable("Not supported in SIMD-only mode"); 10423 } 10424 10425 void CGOpenMPSIMDRuntime::emitSingleRegion( 10426 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 10427 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 10428 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 10429 ArrayRef<const Expr *> AssignmentOps) { 10430 llvm_unreachable("Not supported in SIMD-only mode"); 10431 } 10432 10433 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 10434 const RegionCodeGenTy &OrderedOpGen, 10435 SourceLocation Loc, 10436 bool IsThreads) { 10437 llvm_unreachable("Not supported in SIMD-only mode"); 10438 } 10439 10440 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 10441 SourceLocation Loc, 10442 OpenMPDirectiveKind Kind, 10443 bool EmitChecks, 10444 bool ForceSimpleCall) { 10445 llvm_unreachable("Not supported in SIMD-only mode"); 10446 } 10447 10448 void CGOpenMPSIMDRuntime::emitForDispatchInit( 10449 CodeGenFunction &CGF, SourceLocation Loc, 10450 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 10451 bool Ordered, const DispatchRTInput &DispatchValues) { 10452 llvm_unreachable("Not supported in SIMD-only mode"); 10453 } 10454 10455 void CGOpenMPSIMDRuntime::emitForStaticInit( 10456 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 10457 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 10458 llvm_unreachable("Not supported in SIMD-only mode"); 10459 } 10460 10461 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 10462 CodeGenFunction &CGF, SourceLocation Loc, 10463 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 10464 llvm_unreachable("Not supported in SIMD-only mode"); 10465 } 10466 10467 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 10468 SourceLocation Loc, 10469 unsigned IVSize, 10470 bool IVSigned) { 10471 llvm_unreachable("Not supported in SIMD-only mode"); 10472 } 10473 10474 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 10475 SourceLocation Loc, 10476 OpenMPDirectiveKind DKind) { 10477 llvm_unreachable("Not supported in SIMD-only mode"); 10478 } 10479 10480 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 10481 SourceLocation Loc, 10482 unsigned IVSize, bool IVSigned, 10483 Address IL, Address LB, 10484 Address UB, Address ST) { 10485 llvm_unreachable("Not supported in SIMD-only mode"); 10486 } 10487 10488 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 10489 llvm::Value *NumThreads, 10490 SourceLocation Loc) { 10491 llvm_unreachable("Not supported in SIMD-only mode"); 10492 } 10493 10494 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 10495 OpenMPProcBindClauseKind ProcBind, 10496 SourceLocation Loc) { 10497 llvm_unreachable("Not supported in SIMD-only mode"); 10498 } 10499 10500 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 10501 const VarDecl *VD, 10502 Address VDAddr, 10503 SourceLocation Loc) { 10504 llvm_unreachable("Not supported in SIMD-only mode"); 10505 } 10506 10507 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 10508 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 10509 CodeGenFunction *CGF) { 10510 llvm_unreachable("Not supported in SIMD-only mode"); 10511 } 10512 10513 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 10514 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 10515 llvm_unreachable("Not supported in SIMD-only mode"); 10516 } 10517 10518 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 10519 ArrayRef<const Expr *> Vars, 10520 SourceLocation Loc) { 10521 llvm_unreachable("Not supported in SIMD-only mode"); 10522 } 10523 10524 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 10525 const OMPExecutableDirective &D, 10526 llvm::Function *TaskFunction, 10527 QualType SharedsTy, Address Shareds, 10528 const Expr *IfCond, 10529 const OMPTaskDataTy &Data) { 10530 llvm_unreachable("Not supported in SIMD-only mode"); 10531 } 10532 10533 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 10534 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 10535 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 10536 const Expr *IfCond, const OMPTaskDataTy &Data) { 10537 llvm_unreachable("Not supported in SIMD-only mode"); 10538 } 10539 10540 void CGOpenMPSIMDRuntime::emitReduction( 10541 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 10542 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 10543 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 10544 assert(Options.SimpleReduction && "Only simple reduction is expected."); 10545 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 10546 ReductionOps, Options); 10547 } 10548 10549 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 10550 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 10551 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 10552 llvm_unreachable("Not supported in SIMD-only mode"); 10553 } 10554 10555 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 10556 SourceLocation Loc, 10557 ReductionCodeGen &RCG, 10558 unsigned N) { 10559 llvm_unreachable("Not supported in SIMD-only mode"); 10560 } 10561 10562 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 10563 SourceLocation Loc, 10564 llvm::Value *ReductionsPtr, 10565 LValue SharedLVal) { 10566 llvm_unreachable("Not supported in SIMD-only mode"); 10567 } 10568 10569 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 10570 SourceLocation Loc) { 10571 llvm_unreachable("Not supported in SIMD-only mode"); 10572 } 10573 10574 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 10575 CodeGenFunction &CGF, SourceLocation Loc, 10576 OpenMPDirectiveKind CancelRegion) { 10577 llvm_unreachable("Not supported in SIMD-only mode"); 10578 } 10579 10580 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 10581 SourceLocation Loc, const Expr *IfCond, 10582 OpenMPDirectiveKind CancelRegion) { 10583 llvm_unreachable("Not supported in SIMD-only mode"); 10584 } 10585 10586 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 10587 const OMPExecutableDirective &D, StringRef ParentName, 10588 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 10589 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 10590 llvm_unreachable("Not supported in SIMD-only mode"); 10591 } 10592 10593 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 10594 const OMPExecutableDirective &D, 10595 llvm::Function *OutlinedFn, 10596 llvm::Value *OutlinedFnID, 10597 const Expr *IfCond, 10598 const Expr *Device) { 10599 llvm_unreachable("Not supported in SIMD-only mode"); 10600 } 10601 10602 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 10603 llvm_unreachable("Not supported in SIMD-only mode"); 10604 } 10605 10606 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10607 llvm_unreachable("Not supported in SIMD-only mode"); 10608 } 10609 10610 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 10611 return false; 10612 } 10613 10614 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 10615 return nullptr; 10616 } 10617 10618 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 10619 const OMPExecutableDirective &D, 10620 SourceLocation Loc, 10621 llvm::Function *OutlinedFn, 10622 ArrayRef<llvm::Value *> CapturedVars) { 10623 llvm_unreachable("Not supported in SIMD-only mode"); 10624 } 10625 10626 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10627 const Expr *NumTeams, 10628 const Expr *ThreadLimit, 10629 SourceLocation Loc) { 10630 llvm_unreachable("Not supported in SIMD-only mode"); 10631 } 10632 10633 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 10634 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10635 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10636 llvm_unreachable("Not supported in SIMD-only mode"); 10637 } 10638 10639 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 10640 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10641 const Expr *Device) { 10642 llvm_unreachable("Not supported in SIMD-only mode"); 10643 } 10644 10645 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10646 const OMPLoopDirective &D, 10647 ArrayRef<Expr *> NumIterations) { 10648 llvm_unreachable("Not supported in SIMD-only mode"); 10649 } 10650 10651 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10652 const OMPDependClause *C) { 10653 llvm_unreachable("Not supported in SIMD-only mode"); 10654 } 10655 10656 const VarDecl * 10657 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 10658 const VarDecl *NativeParam) const { 10659 llvm_unreachable("Not supported in SIMD-only mode"); 10660 } 10661 10662 Address 10663 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 10664 const VarDecl *NativeParam, 10665 const VarDecl *TargetParam) const { 10666 llvm_unreachable("Not supported in SIMD-only mode"); 10667 } 10668