1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 /// Describes ident structure that describes a source location. 461 /// All descriptions are taken from 462 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 463 /// Original structure: 464 /// typedef struct ident { 465 /// kmp_int32 reserved_1; /**< might be used in Fortran; 466 /// see above */ 467 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 468 /// KMP_IDENT_KMPC identifies this union 469 /// member */ 470 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 471 /// see above */ 472 ///#if USE_ITT_BUILD 473 /// /* but currently used for storing 474 /// region-specific ITT */ 475 /// /* contextual information. */ 476 ///#endif /* USE_ITT_BUILD */ 477 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 478 /// C++ */ 479 /// char const *psource; /**< String describing the source location. 480 /// The string is composed of semi-colon separated 481 // fields which describe the source file, 482 /// the function and a pair of line numbers that 483 /// delimit the construct. 484 /// */ 485 /// } ident_t; 486 enum IdentFieldIndex { 487 /// might be used in Fortran 488 IdentField_Reserved_1, 489 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 490 IdentField_Flags, 491 /// Not really used in Fortran any more 492 IdentField_Reserved_2, 493 /// Source[4] in Fortran, do not use for C++ 494 IdentField_Reserved_3, 495 /// String describing the source location. The string is composed of 496 /// semi-colon separated fields which describe the source file, the function 497 /// and a pair of line numbers that delimit the construct. 498 IdentField_PSource 499 }; 500 501 /// Schedule types for 'omp for' loops (these enumerators are taken from 502 /// the enum sched_type in kmp.h). 503 enum OpenMPSchedType { 504 /// Lower bound for default (unordered) versions. 505 OMP_sch_lower = 32, 506 OMP_sch_static_chunked = 33, 507 OMP_sch_static = 34, 508 OMP_sch_dynamic_chunked = 35, 509 OMP_sch_guided_chunked = 36, 510 OMP_sch_runtime = 37, 511 OMP_sch_auto = 38, 512 /// static with chunk adjustment (e.g., simd) 513 OMP_sch_static_balanced_chunked = 45, 514 /// Lower bound for 'ordered' versions. 515 OMP_ord_lower = 64, 516 OMP_ord_static_chunked = 65, 517 OMP_ord_static = 66, 518 OMP_ord_dynamic_chunked = 67, 519 OMP_ord_guided_chunked = 68, 520 OMP_ord_runtime = 69, 521 OMP_ord_auto = 70, 522 OMP_sch_default = OMP_sch_static, 523 /// dist_schedule types 524 OMP_dist_sch_static_chunked = 91, 525 OMP_dist_sch_static = 92, 526 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 527 /// Set if the monotonic schedule modifier was present. 528 OMP_sch_modifier_monotonic = (1 << 29), 529 /// Set if the nonmonotonic schedule modifier was present. 530 OMP_sch_modifier_nonmonotonic = (1 << 30), 531 }; 532 533 enum OpenMPRTLFunction { 534 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 535 /// kmpc_micro microtask, ...); 536 OMPRTL__kmpc_fork_call, 537 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 538 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 539 OMPRTL__kmpc_threadprivate_cached, 540 /// Call to void __kmpc_threadprivate_register( ident_t *, 541 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 542 OMPRTL__kmpc_threadprivate_register, 543 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 544 OMPRTL__kmpc_global_thread_num, 545 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 546 // kmp_critical_name *crit); 547 OMPRTL__kmpc_critical, 548 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 549 // global_tid, kmp_critical_name *crit, uintptr_t hint); 550 OMPRTL__kmpc_critical_with_hint, 551 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 552 // kmp_critical_name *crit); 553 OMPRTL__kmpc_end_critical, 554 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 555 // global_tid); 556 OMPRTL__kmpc_cancel_barrier, 557 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 558 OMPRTL__kmpc_barrier, 559 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 560 OMPRTL__kmpc_for_static_fini, 561 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 562 // global_tid); 563 OMPRTL__kmpc_serialized_parallel, 564 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 565 // global_tid); 566 OMPRTL__kmpc_end_serialized_parallel, 567 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 568 // kmp_int32 num_threads); 569 OMPRTL__kmpc_push_num_threads, 570 // Call to void __kmpc_flush(ident_t *loc); 571 OMPRTL__kmpc_flush, 572 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 573 OMPRTL__kmpc_master, 574 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 575 OMPRTL__kmpc_end_master, 576 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 577 // int end_part); 578 OMPRTL__kmpc_omp_taskyield, 579 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 580 OMPRTL__kmpc_single, 581 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 582 OMPRTL__kmpc_end_single, 583 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 584 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 585 // kmp_routine_entry_t *task_entry); 586 OMPRTL__kmpc_omp_task_alloc, 587 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 588 // new_task); 589 OMPRTL__kmpc_omp_task, 590 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 591 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 592 // kmp_int32 didit); 593 OMPRTL__kmpc_copyprivate, 594 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 595 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 596 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 597 OMPRTL__kmpc_reduce, 598 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 599 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 600 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 601 // *lck); 602 OMPRTL__kmpc_reduce_nowait, 603 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 604 // kmp_critical_name *lck); 605 OMPRTL__kmpc_end_reduce, 606 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 607 // kmp_critical_name *lck); 608 OMPRTL__kmpc_end_reduce_nowait, 609 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 610 // kmp_task_t * new_task); 611 OMPRTL__kmpc_omp_task_begin_if0, 612 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 613 // kmp_task_t * new_task); 614 OMPRTL__kmpc_omp_task_complete_if0, 615 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 616 OMPRTL__kmpc_ordered, 617 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 618 OMPRTL__kmpc_end_ordered, 619 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 620 // global_tid); 621 OMPRTL__kmpc_omp_taskwait, 622 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 623 OMPRTL__kmpc_taskgroup, 624 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 625 OMPRTL__kmpc_end_taskgroup, 626 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 627 // int proc_bind); 628 OMPRTL__kmpc_push_proc_bind, 629 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 630 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 631 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 632 OMPRTL__kmpc_omp_task_with_deps, 633 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 634 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 635 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 636 OMPRTL__kmpc_omp_wait_deps, 637 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 638 // global_tid, kmp_int32 cncl_kind); 639 OMPRTL__kmpc_cancellationpoint, 640 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 641 // kmp_int32 cncl_kind); 642 OMPRTL__kmpc_cancel, 643 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 644 // kmp_int32 num_teams, kmp_int32 thread_limit); 645 OMPRTL__kmpc_push_num_teams, 646 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 647 // microtask, ...); 648 OMPRTL__kmpc_fork_teams, 649 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 650 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 651 // sched, kmp_uint64 grainsize, void *task_dup); 652 OMPRTL__kmpc_taskloop, 653 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 654 // num_dims, struct kmp_dim *dims); 655 OMPRTL__kmpc_doacross_init, 656 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 657 OMPRTL__kmpc_doacross_fini, 658 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 659 // *vec); 660 OMPRTL__kmpc_doacross_post, 661 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 662 // *vec); 663 OMPRTL__kmpc_doacross_wait, 664 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 665 // *data); 666 OMPRTL__kmpc_task_reduction_init, 667 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 668 // *d); 669 OMPRTL__kmpc_task_reduction_get_th_data, 670 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 671 OMPRTL__kmpc_alloc, 672 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 673 OMPRTL__kmpc_free, 674 675 // 676 // Offloading related calls 677 // 678 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 679 // size); 680 OMPRTL__kmpc_push_target_tripcount, 681 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 682 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 683 // *arg_types); 684 OMPRTL__tgt_target, 685 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 686 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 687 // *arg_types); 688 OMPRTL__tgt_target_nowait, 689 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 690 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 691 // *arg_types, int32_t num_teams, int32_t thread_limit); 692 OMPRTL__tgt_target_teams, 693 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 694 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 695 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 696 OMPRTL__tgt_target_teams_nowait, 697 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 698 OMPRTL__tgt_register_lib, 699 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 700 OMPRTL__tgt_unregister_lib, 701 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 702 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 703 OMPRTL__tgt_target_data_begin, 704 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 705 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 706 // *arg_types); 707 OMPRTL__tgt_target_data_begin_nowait, 708 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 709 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 710 OMPRTL__tgt_target_data_end, 711 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 712 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 713 // *arg_types); 714 OMPRTL__tgt_target_data_end_nowait, 715 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 716 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 717 OMPRTL__tgt_target_data_update, 718 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 719 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 720 // *arg_types); 721 OMPRTL__tgt_target_data_update_nowait, 722 }; 723 724 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 725 /// region. 726 class CleanupTy final : public EHScopeStack::Cleanup { 727 PrePostActionTy *Action; 728 729 public: 730 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 731 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 732 if (!CGF.HaveInsertPoint()) 733 return; 734 Action->Exit(CGF); 735 } 736 }; 737 738 } // anonymous namespace 739 740 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 741 CodeGenFunction::RunCleanupsScope Scope(CGF); 742 if (PrePostAction) { 743 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 744 Callback(CodeGen, CGF, *PrePostAction); 745 } else { 746 PrePostActionTy Action; 747 Callback(CodeGen, CGF, Action); 748 } 749 } 750 751 /// Check if the combiner is a call to UDR combiner and if it is so return the 752 /// UDR decl used for reduction. 753 static const OMPDeclareReductionDecl * 754 getReductionInit(const Expr *ReductionOp) { 755 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 756 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 757 if (const auto *DRE = 758 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 759 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 760 return DRD; 761 return nullptr; 762 } 763 764 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 765 const OMPDeclareReductionDecl *DRD, 766 const Expr *InitOp, 767 Address Private, Address Original, 768 QualType Ty) { 769 if (DRD->getInitializer()) { 770 std::pair<llvm::Function *, llvm::Function *> Reduction = 771 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 772 const auto *CE = cast<CallExpr>(InitOp); 773 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 774 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 775 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 776 const auto *LHSDRE = 777 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 778 const auto *RHSDRE = 779 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 780 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 781 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 782 [=]() { return Private; }); 783 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 784 [=]() { return Original; }); 785 (void)PrivateScope.Privatize(); 786 RValue Func = RValue::get(Reduction.second); 787 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 788 CGF.EmitIgnoredExpr(InitOp); 789 } else { 790 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 791 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 792 auto *GV = new llvm::GlobalVariable( 793 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 794 llvm::GlobalValue::PrivateLinkage, Init, Name); 795 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 796 RValue InitRVal; 797 switch (CGF.getEvaluationKind(Ty)) { 798 case TEK_Scalar: 799 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 800 break; 801 case TEK_Complex: 802 InitRVal = 803 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 804 break; 805 case TEK_Aggregate: 806 InitRVal = RValue::getAggregate(LV.getAddress()); 807 break; 808 } 809 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 810 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 811 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 812 /*IsInitializer=*/false); 813 } 814 } 815 816 /// Emit initialization of arrays of complex types. 817 /// \param DestAddr Address of the array. 818 /// \param Type Type of array. 819 /// \param Init Initial expression of array. 820 /// \param SrcAddr Address of the original array. 821 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 822 QualType Type, bool EmitDeclareReductionInit, 823 const Expr *Init, 824 const OMPDeclareReductionDecl *DRD, 825 Address SrcAddr = Address::invalid()) { 826 // Perform element-by-element initialization. 827 QualType ElementTy; 828 829 // Drill down to the base element type on both arrays. 830 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 831 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 832 DestAddr = 833 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 834 if (DRD) 835 SrcAddr = 836 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 837 838 llvm::Value *SrcBegin = nullptr; 839 if (DRD) 840 SrcBegin = SrcAddr.getPointer(); 841 llvm::Value *DestBegin = DestAddr.getPointer(); 842 // Cast from pointer to array type to pointer to single element. 843 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 844 // The basic structure here is a while-do loop. 845 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 846 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 847 llvm::Value *IsEmpty = 848 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 849 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 850 851 // Enter the loop body, making that address the current address. 852 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 853 CGF.EmitBlock(BodyBB); 854 855 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 856 857 llvm::PHINode *SrcElementPHI = nullptr; 858 Address SrcElementCurrent = Address::invalid(); 859 if (DRD) { 860 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 861 "omp.arraycpy.srcElementPast"); 862 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 863 SrcElementCurrent = 864 Address(SrcElementPHI, 865 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 866 } 867 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 868 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 869 DestElementPHI->addIncoming(DestBegin, EntryBB); 870 Address DestElementCurrent = 871 Address(DestElementPHI, 872 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 873 874 // Emit copy. 875 { 876 CodeGenFunction::RunCleanupsScope InitScope(CGF); 877 if (EmitDeclareReductionInit) { 878 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 879 SrcElementCurrent, ElementTy); 880 } else 881 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 882 /*IsInitializer=*/false); 883 } 884 885 if (DRD) { 886 // Shift the address forward by one element. 887 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 888 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 889 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 890 } 891 892 // Shift the address forward by one element. 893 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 894 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 895 // Check whether we've reached the end. 896 llvm::Value *Done = 897 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 898 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 899 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 900 901 // Done. 902 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 903 } 904 905 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 906 return CGF.EmitOMPSharedLValue(E); 907 } 908 909 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 910 const Expr *E) { 911 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 912 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 913 return LValue(); 914 } 915 916 void ReductionCodeGen::emitAggregateInitialization( 917 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 918 const OMPDeclareReductionDecl *DRD) { 919 // Emit VarDecl with copy init for arrays. 920 // Get the address of the original variable captured in current 921 // captured region. 922 const auto *PrivateVD = 923 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 924 bool EmitDeclareReductionInit = 925 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 926 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 927 EmitDeclareReductionInit, 928 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 929 : PrivateVD->getInit(), 930 DRD, SharedLVal.getAddress()); 931 } 932 933 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 934 ArrayRef<const Expr *> Privates, 935 ArrayRef<const Expr *> ReductionOps) { 936 ClausesData.reserve(Shareds.size()); 937 SharedAddresses.reserve(Shareds.size()); 938 Sizes.reserve(Shareds.size()); 939 BaseDecls.reserve(Shareds.size()); 940 auto IPriv = Privates.begin(); 941 auto IRed = ReductionOps.begin(); 942 for (const Expr *Ref : Shareds) { 943 ClausesData.emplace_back(Ref, *IPriv, *IRed); 944 std::advance(IPriv, 1); 945 std::advance(IRed, 1); 946 } 947 } 948 949 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 950 assert(SharedAddresses.size() == N && 951 "Number of generated lvalues must be exactly N."); 952 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 953 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 954 SharedAddresses.emplace_back(First, Second); 955 } 956 957 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 958 const auto *PrivateVD = 959 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 960 QualType PrivateType = PrivateVD->getType(); 961 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 962 if (!PrivateType->isVariablyModifiedType()) { 963 Sizes.emplace_back( 964 CGF.getTypeSize( 965 SharedAddresses[N].first.getType().getNonReferenceType()), 966 nullptr); 967 return; 968 } 969 llvm::Value *Size; 970 llvm::Value *SizeInChars; 971 auto *ElemType = 972 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 973 ->getElementType(); 974 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 975 if (AsArraySection) { 976 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 977 SharedAddresses[N].first.getPointer()); 978 Size = CGF.Builder.CreateNUWAdd( 979 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 980 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 981 } else { 982 SizeInChars = CGF.getTypeSize( 983 SharedAddresses[N].first.getType().getNonReferenceType()); 984 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 985 } 986 Sizes.emplace_back(SizeInChars, Size); 987 CodeGenFunction::OpaqueValueMapping OpaqueMap( 988 CGF, 989 cast<OpaqueValueExpr>( 990 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 991 RValue::get(Size)); 992 CGF.EmitVariablyModifiedType(PrivateType); 993 } 994 995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 996 llvm::Value *Size) { 997 const auto *PrivateVD = 998 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 999 QualType PrivateType = PrivateVD->getType(); 1000 if (!PrivateType->isVariablyModifiedType()) { 1001 assert(!Size && !Sizes[N].second && 1002 "Size should be nullptr for non-variably modified reduction " 1003 "items."); 1004 return; 1005 } 1006 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1007 CGF, 1008 cast<OpaqueValueExpr>( 1009 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1010 RValue::get(Size)); 1011 CGF.EmitVariablyModifiedType(PrivateType); 1012 } 1013 1014 void ReductionCodeGen::emitInitialization( 1015 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1016 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1017 assert(SharedAddresses.size() > N && "No variable was generated"); 1018 const auto *PrivateVD = 1019 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 QualType PrivateType = PrivateVD->getType(); 1023 PrivateAddr = CGF.Builder.CreateElementBitCast( 1024 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1025 QualType SharedType = SharedAddresses[N].first.getType(); 1026 SharedLVal = CGF.MakeAddrLValue( 1027 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1028 CGF.ConvertTypeForMem(SharedType)), 1029 SharedType, SharedAddresses[N].first.getBaseInfo(), 1030 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1031 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1032 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1033 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1034 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1035 PrivateAddr, SharedLVal.getAddress(), 1036 SharedLVal.getType()); 1037 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1038 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1039 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1040 PrivateVD->getType().getQualifiers(), 1041 /*IsInitializer=*/false); 1042 } 1043 } 1044 1045 bool ReductionCodeGen::needCleanups(unsigned N) { 1046 const auto *PrivateVD = 1047 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1048 QualType PrivateType = PrivateVD->getType(); 1049 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1050 return DTorKind != QualType::DK_none; 1051 } 1052 1053 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1054 Address PrivateAddr) { 1055 const auto *PrivateVD = 1056 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1057 QualType PrivateType = PrivateVD->getType(); 1058 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1059 if (needCleanups(N)) { 1060 PrivateAddr = CGF.Builder.CreateElementBitCast( 1061 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1062 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1063 } 1064 } 1065 1066 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1067 LValue BaseLV) { 1068 BaseTy = BaseTy.getNonReferenceType(); 1069 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1070 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1071 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1072 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1073 } else { 1074 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1075 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1076 } 1077 BaseTy = BaseTy->getPointeeType(); 1078 } 1079 return CGF.MakeAddrLValue( 1080 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1081 CGF.ConvertTypeForMem(ElTy)), 1082 BaseLV.getType(), BaseLV.getBaseInfo(), 1083 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1084 } 1085 1086 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1087 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1088 llvm::Value *Addr) { 1089 Address Tmp = Address::invalid(); 1090 Address TopTmp = Address::invalid(); 1091 Address MostTopTmp = Address::invalid(); 1092 BaseTy = BaseTy.getNonReferenceType(); 1093 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1094 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1095 Tmp = CGF.CreateMemTemp(BaseTy); 1096 if (TopTmp.isValid()) 1097 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1098 else 1099 MostTopTmp = Tmp; 1100 TopTmp = Tmp; 1101 BaseTy = BaseTy->getPointeeType(); 1102 } 1103 llvm::Type *Ty = BaseLVType; 1104 if (Tmp.isValid()) 1105 Ty = Tmp.getElementType(); 1106 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1107 if (Tmp.isValid()) { 1108 CGF.Builder.CreateStore(Addr, Tmp); 1109 return MostTopTmp; 1110 } 1111 return Address(Addr, BaseLVAlignment); 1112 } 1113 1114 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1115 const VarDecl *OrigVD = nullptr; 1116 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1117 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1118 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1119 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1120 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1121 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1122 DE = cast<DeclRefExpr>(Base); 1123 OrigVD = cast<VarDecl>(DE->getDecl()); 1124 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1125 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1126 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1127 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1128 DE = cast<DeclRefExpr>(Base); 1129 OrigVD = cast<VarDecl>(DE->getDecl()); 1130 } 1131 return OrigVD; 1132 } 1133 1134 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1135 Address PrivateAddr) { 1136 const DeclRefExpr *DE; 1137 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1138 BaseDecls.emplace_back(OrigVD); 1139 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1140 LValue BaseLValue = 1141 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1142 OriginalBaseLValue); 1143 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1144 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1145 llvm::Value *PrivatePointer = 1146 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1147 PrivateAddr.getPointer(), 1148 SharedAddresses[N].first.getAddress().getType()); 1149 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1150 return castToBase(CGF, OrigVD->getType(), 1151 SharedAddresses[N].first.getType(), 1152 OriginalBaseLValue.getAddress().getType(), 1153 OriginalBaseLValue.getAlignment(), Ptr); 1154 } 1155 BaseDecls.emplace_back( 1156 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1157 return PrivateAddr; 1158 } 1159 1160 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1161 const OMPDeclareReductionDecl *DRD = 1162 getReductionInit(ClausesData[N].ReductionOp); 1163 return DRD && DRD->getInitializer(); 1164 } 1165 1166 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1167 return CGF.EmitLoadOfPointerLValue( 1168 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1169 getThreadIDVariable()->getType()->castAs<PointerType>()); 1170 } 1171 1172 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1173 if (!CGF.HaveInsertPoint()) 1174 return; 1175 // 1.2.2 OpenMP Language Terminology 1176 // Structured block - An executable statement with a single entry at the 1177 // top and a single exit at the bottom. 1178 // The point of exit cannot be a branch out of the structured block. 1179 // longjmp() and throw() must not violate the entry/exit criteria. 1180 CGF.EHStack.pushTerminate(); 1181 CodeGen(CGF); 1182 CGF.EHStack.popTerminate(); 1183 } 1184 1185 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1186 CodeGenFunction &CGF) { 1187 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1188 getThreadIDVariable()->getType(), 1189 AlignmentSource::Decl); 1190 } 1191 1192 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1193 QualType FieldTy) { 1194 auto *Field = FieldDecl::Create( 1195 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1196 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1197 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1198 Field->setAccess(AS_public); 1199 DC->addDecl(Field); 1200 return Field; 1201 } 1202 1203 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1204 StringRef Separator) 1205 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1206 OffloadEntriesInfoManager(CGM) { 1207 ASTContext &C = CGM.getContext(); 1208 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1209 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1210 RD->startDefinition(); 1211 // reserved_1 1212 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1213 // flags 1214 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1215 // reserved_2 1216 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1217 // reserved_3 1218 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1219 // psource 1220 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1221 RD->completeDefinition(); 1222 IdentQTy = C.getRecordType(RD); 1223 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1224 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1225 1226 loadOffloadInfoMetadata(); 1227 } 1228 1229 void CGOpenMPRuntime::clear() { 1230 InternalVars.clear(); 1231 // Clean non-target variable declarations possibly used only in debug info. 1232 for (const auto &Data : EmittedNonTargetVariables) { 1233 if (!Data.getValue().pointsToAliveValue()) 1234 continue; 1235 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1236 if (!GV) 1237 continue; 1238 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1239 continue; 1240 GV->eraseFromParent(); 1241 } 1242 } 1243 1244 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1245 SmallString<128> Buffer; 1246 llvm::raw_svector_ostream OS(Buffer); 1247 StringRef Sep = FirstSeparator; 1248 for (StringRef Part : Parts) { 1249 OS << Sep << Part; 1250 Sep = Separator; 1251 } 1252 return OS.str(); 1253 } 1254 1255 static llvm::Function * 1256 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1257 const Expr *CombinerInitializer, const VarDecl *In, 1258 const VarDecl *Out, bool IsCombiner) { 1259 // void .omp_combiner.(Ty *in, Ty *out); 1260 ASTContext &C = CGM.getContext(); 1261 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1262 FunctionArgList Args; 1263 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1264 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1265 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1266 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1267 Args.push_back(&OmpOutParm); 1268 Args.push_back(&OmpInParm); 1269 const CGFunctionInfo &FnInfo = 1270 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1271 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1272 std::string Name = CGM.getOpenMPRuntime().getName( 1273 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1274 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1275 Name, &CGM.getModule()); 1276 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1277 Fn->removeFnAttr(llvm::Attribute::NoInline); 1278 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1279 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1280 CodeGenFunction CGF(CGM); 1281 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1282 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1283 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1284 Out->getLocation()); 1285 CodeGenFunction::OMPPrivateScope Scope(CGF); 1286 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1287 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1288 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1289 .getAddress(); 1290 }); 1291 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1292 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1293 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1294 .getAddress(); 1295 }); 1296 (void)Scope.Privatize(); 1297 if (!IsCombiner && Out->hasInit() && 1298 !CGF.isTrivialInitializer(Out->getInit())) { 1299 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1300 Out->getType().getQualifiers(), 1301 /*IsInitializer=*/true); 1302 } 1303 if (CombinerInitializer) 1304 CGF.EmitIgnoredExpr(CombinerInitializer); 1305 Scope.ForceCleanup(); 1306 CGF.FinishFunction(); 1307 return Fn; 1308 } 1309 1310 void CGOpenMPRuntime::emitUserDefinedReduction( 1311 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1312 if (UDRMap.count(D) > 0) 1313 return; 1314 llvm::Function *Combiner = emitCombinerOrInitializer( 1315 CGM, D->getType(), D->getCombiner(), 1316 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1317 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1318 /*IsCombiner=*/true); 1319 llvm::Function *Initializer = nullptr; 1320 if (const Expr *Init = D->getInitializer()) { 1321 Initializer = emitCombinerOrInitializer( 1322 CGM, D->getType(), 1323 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1324 : nullptr, 1325 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1326 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1327 /*IsCombiner=*/false); 1328 } 1329 UDRMap.try_emplace(D, Combiner, Initializer); 1330 if (CGF) { 1331 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1332 Decls.second.push_back(D); 1333 } 1334 } 1335 1336 std::pair<llvm::Function *, llvm::Function *> 1337 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1338 auto I = UDRMap.find(D); 1339 if (I != UDRMap.end()) 1340 return I->second; 1341 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1342 return UDRMap.lookup(D); 1343 } 1344 1345 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1346 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1347 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1348 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1349 assert(ThreadIDVar->getType()->isPointerType() && 1350 "thread id variable must be of type kmp_int32 *"); 1351 CodeGenFunction CGF(CGM, true); 1352 bool HasCancel = false; 1353 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1354 HasCancel = OPD->hasCancel(); 1355 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1356 HasCancel = OPSD->hasCancel(); 1357 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1358 HasCancel = OPFD->hasCancel(); 1359 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1360 HasCancel = OPFD->hasCancel(); 1361 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1362 HasCancel = OPFD->hasCancel(); 1363 else if (const auto *OPFD = 1364 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1365 HasCancel = OPFD->hasCancel(); 1366 else if (const auto *OPFD = 1367 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1368 HasCancel = OPFD->hasCancel(); 1369 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1370 HasCancel, OutlinedHelperName); 1371 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1372 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1373 } 1374 1375 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1376 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1377 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1378 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1379 return emitParallelOrTeamsOutlinedFunction( 1380 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1381 } 1382 1383 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1384 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1385 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1386 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1387 return emitParallelOrTeamsOutlinedFunction( 1388 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1389 } 1390 1391 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1392 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1393 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1394 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1395 bool Tied, unsigned &NumberOfParts) { 1396 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1397 PrePostActionTy &) { 1398 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1399 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1400 llvm::Value *TaskArgs[] = { 1401 UpLoc, ThreadID, 1402 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1403 TaskTVar->getType()->castAs<PointerType>()) 1404 .getPointer()}; 1405 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1406 }; 1407 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1408 UntiedCodeGen); 1409 CodeGen.setAction(Action); 1410 assert(!ThreadIDVar->getType()->isPointerType() && 1411 "thread id variable must be of type kmp_int32 for tasks"); 1412 const OpenMPDirectiveKind Region = 1413 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1414 : OMPD_task; 1415 const CapturedStmt *CS = D.getCapturedStmt(Region); 1416 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1417 CodeGenFunction CGF(CGM, true); 1418 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1419 InnermostKind, 1420 TD ? TD->hasCancel() : false, Action); 1421 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1422 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1423 if (!Tied) 1424 NumberOfParts = Action.getNumberOfParts(); 1425 return Res; 1426 } 1427 1428 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1429 const RecordDecl *RD, const CGRecordLayout &RL, 1430 ArrayRef<llvm::Constant *> Data) { 1431 llvm::StructType *StructTy = RL.getLLVMType(); 1432 unsigned PrevIdx = 0; 1433 ConstantInitBuilder CIBuilder(CGM); 1434 auto DI = Data.begin(); 1435 for (const FieldDecl *FD : RD->fields()) { 1436 unsigned Idx = RL.getLLVMFieldNo(FD); 1437 // Fill the alignment. 1438 for (unsigned I = PrevIdx; I < Idx; ++I) 1439 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1440 PrevIdx = Idx + 1; 1441 Fields.add(*DI); 1442 ++DI; 1443 } 1444 } 1445 1446 template <class... As> 1447 static llvm::GlobalVariable * 1448 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1449 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1450 As &&... Args) { 1451 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1452 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1453 ConstantInitBuilder CIBuilder(CGM); 1454 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1455 buildStructValue(Fields, CGM, RD, RL, Data); 1456 return Fields.finishAndCreateGlobal( 1457 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1458 std::forward<As>(Args)...); 1459 } 1460 1461 template <typename T> 1462 static void 1463 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1464 ArrayRef<llvm::Constant *> Data, 1465 T &Parent) { 1466 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1467 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1468 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1469 buildStructValue(Fields, CGM, RD, RL, Data); 1470 Fields.finishAndAddTo(Parent); 1471 } 1472 1473 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1474 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1475 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1476 FlagsTy FlagsKey(Flags, Reserved2Flags); 1477 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1478 if (!Entry) { 1479 if (!DefaultOpenMPPSource) { 1480 // Initialize default location for psource field of ident_t structure of 1481 // all ident_t objects. Format is ";file;function;line;column;;". 1482 // Taken from 1483 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1484 DefaultOpenMPPSource = 1485 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1486 DefaultOpenMPPSource = 1487 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1488 } 1489 1490 llvm::Constant *Data[] = { 1491 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1492 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1493 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1494 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1495 llvm::GlobalValue *DefaultOpenMPLocation = 1496 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1497 llvm::GlobalValue::PrivateLinkage); 1498 DefaultOpenMPLocation->setUnnamedAddr( 1499 llvm::GlobalValue::UnnamedAddr::Global); 1500 1501 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1502 } 1503 return Address(Entry, Align); 1504 } 1505 1506 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1507 bool AtCurrentPoint) { 1508 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1509 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1510 1511 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1512 if (AtCurrentPoint) { 1513 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1514 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1515 } else { 1516 Elem.second.ServiceInsertPt = 1517 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1518 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1519 } 1520 } 1521 1522 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1523 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1524 if (Elem.second.ServiceInsertPt) { 1525 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1526 Elem.second.ServiceInsertPt = nullptr; 1527 Ptr->eraseFromParent(); 1528 } 1529 } 1530 1531 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1532 SourceLocation Loc, 1533 unsigned Flags) { 1534 Flags |= OMP_IDENT_KMPC; 1535 // If no debug info is generated - return global default location. 1536 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1537 Loc.isInvalid()) 1538 return getOrCreateDefaultLocation(Flags).getPointer(); 1539 1540 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1541 1542 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1543 Address LocValue = Address::invalid(); 1544 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1545 if (I != OpenMPLocThreadIDMap.end()) 1546 LocValue = Address(I->second.DebugLoc, Align); 1547 1548 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1549 // GetOpenMPThreadID was called before this routine. 1550 if (!LocValue.isValid()) { 1551 // Generate "ident_t .kmpc_loc.addr;" 1552 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1553 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1554 Elem.second.DebugLoc = AI.getPointer(); 1555 LocValue = AI; 1556 1557 if (!Elem.second.ServiceInsertPt) 1558 setLocThreadIdInsertPt(CGF); 1559 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1560 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1561 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1562 CGF.getTypeSize(IdentQTy)); 1563 } 1564 1565 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1566 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1567 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1568 LValue PSource = 1569 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1570 1571 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1572 if (OMPDebugLoc == nullptr) { 1573 SmallString<128> Buffer2; 1574 llvm::raw_svector_ostream OS2(Buffer2); 1575 // Build debug location 1576 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1577 OS2 << ";" << PLoc.getFilename() << ";"; 1578 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1579 OS2 << FD->getQualifiedNameAsString(); 1580 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1581 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1582 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1583 } 1584 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1585 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1586 1587 // Our callers always pass this to a runtime function, so for 1588 // convenience, go ahead and return a naked pointer. 1589 return LocValue.getPointer(); 1590 } 1591 1592 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1593 SourceLocation Loc) { 1594 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1595 1596 llvm::Value *ThreadID = nullptr; 1597 // Check whether we've already cached a load of the thread id in this 1598 // function. 1599 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1600 if (I != OpenMPLocThreadIDMap.end()) { 1601 ThreadID = I->second.ThreadID; 1602 if (ThreadID != nullptr) 1603 return ThreadID; 1604 } 1605 // If exceptions are enabled, do not use parameter to avoid possible crash. 1606 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1607 !CGF.getLangOpts().CXXExceptions || 1608 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1609 if (auto *OMPRegionInfo = 1610 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1611 if (OMPRegionInfo->getThreadIDVariable()) { 1612 // Check if this an outlined function with thread id passed as argument. 1613 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1614 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1615 // If value loaded in entry block, cache it and use it everywhere in 1616 // function. 1617 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1618 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1619 Elem.second.ThreadID = ThreadID; 1620 } 1621 return ThreadID; 1622 } 1623 } 1624 } 1625 1626 // This is not an outlined function region - need to call __kmpc_int32 1627 // kmpc_global_thread_num(ident_t *loc). 1628 // Generate thread id value and cache this value for use across the 1629 // function. 1630 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1631 if (!Elem.second.ServiceInsertPt) 1632 setLocThreadIdInsertPt(CGF); 1633 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1634 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1635 llvm::CallInst *Call = CGF.Builder.CreateCall( 1636 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1637 emitUpdateLocation(CGF, Loc)); 1638 Call->setCallingConv(CGF.getRuntimeCC()); 1639 Elem.second.ThreadID = Call; 1640 return Call; 1641 } 1642 1643 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1644 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1645 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1646 clearLocThreadIdInsertPt(CGF); 1647 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1648 } 1649 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1650 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1651 UDRMap.erase(D); 1652 FunctionUDRMap.erase(CGF.CurFn); 1653 } 1654 } 1655 1656 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1657 return IdentTy->getPointerTo(); 1658 } 1659 1660 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1661 if (!Kmpc_MicroTy) { 1662 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1663 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1664 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1665 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1666 } 1667 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1668 } 1669 1670 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1671 llvm::FunctionCallee RTLFn = nullptr; 1672 switch (static_cast<OpenMPRTLFunction>(Function)) { 1673 case OMPRTL__kmpc_fork_call: { 1674 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1675 // microtask, ...); 1676 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1677 getKmpc_MicroPointerTy()}; 1678 auto *FnTy = 1679 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1680 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1681 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1682 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1683 llvm::LLVMContext &Ctx = F->getContext(); 1684 llvm::MDBuilder MDB(Ctx); 1685 // Annotate the callback behavior of the __kmpc_fork_call: 1686 // - The callback callee is argument number 2 (microtask). 1687 // - The first two arguments of the callback callee are unknown (-1). 1688 // - All variadic arguments to the __kmpc_fork_call are passed to the 1689 // callback callee. 1690 F->addMetadata( 1691 llvm::LLVMContext::MD_callback, 1692 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1693 2, {-1, -1}, 1694 /* VarArgsArePassed */ true)})); 1695 } 1696 } 1697 break; 1698 } 1699 case OMPRTL__kmpc_global_thread_num: { 1700 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1701 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1702 auto *FnTy = 1703 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1704 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1705 break; 1706 } 1707 case OMPRTL__kmpc_threadprivate_cached: { 1708 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1709 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1710 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1711 CGM.VoidPtrTy, CGM.SizeTy, 1712 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1713 auto *FnTy = 1714 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1715 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1716 break; 1717 } 1718 case OMPRTL__kmpc_critical: { 1719 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1720 // kmp_critical_name *crit); 1721 llvm::Type *TypeParams[] = { 1722 getIdentTyPointerTy(), CGM.Int32Ty, 1723 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1724 auto *FnTy = 1725 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1726 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1727 break; 1728 } 1729 case OMPRTL__kmpc_critical_with_hint: { 1730 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1731 // kmp_critical_name *crit, uintptr_t hint); 1732 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1733 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1734 CGM.IntPtrTy}; 1735 auto *FnTy = 1736 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1737 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1738 break; 1739 } 1740 case OMPRTL__kmpc_threadprivate_register: { 1741 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1742 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1743 // typedef void *(*kmpc_ctor)(void *); 1744 auto *KmpcCtorTy = 1745 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1746 /*isVarArg*/ false)->getPointerTo(); 1747 // typedef void *(*kmpc_cctor)(void *, void *); 1748 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1749 auto *KmpcCopyCtorTy = 1750 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1751 /*isVarArg*/ false) 1752 ->getPointerTo(); 1753 // typedef void (*kmpc_dtor)(void *); 1754 auto *KmpcDtorTy = 1755 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1756 ->getPointerTo(); 1757 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1758 KmpcCopyCtorTy, KmpcDtorTy}; 1759 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1760 /*isVarArg*/ false); 1761 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1762 break; 1763 } 1764 case OMPRTL__kmpc_end_critical: { 1765 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1766 // kmp_critical_name *crit); 1767 llvm::Type *TypeParams[] = { 1768 getIdentTyPointerTy(), CGM.Int32Ty, 1769 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1770 auto *FnTy = 1771 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1772 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1773 break; 1774 } 1775 case OMPRTL__kmpc_cancel_barrier: { 1776 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1777 // global_tid); 1778 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1779 auto *FnTy = 1780 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1781 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1782 break; 1783 } 1784 case OMPRTL__kmpc_barrier: { 1785 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1786 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1787 auto *FnTy = 1788 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1789 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1790 break; 1791 } 1792 case OMPRTL__kmpc_for_static_fini: { 1793 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1794 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1795 auto *FnTy = 1796 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1797 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1798 break; 1799 } 1800 case OMPRTL__kmpc_push_num_threads: { 1801 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1802 // kmp_int32 num_threads) 1803 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1804 CGM.Int32Ty}; 1805 auto *FnTy = 1806 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1807 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1808 break; 1809 } 1810 case OMPRTL__kmpc_serialized_parallel: { 1811 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1812 // global_tid); 1813 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1814 auto *FnTy = 1815 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1816 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1817 break; 1818 } 1819 case OMPRTL__kmpc_end_serialized_parallel: { 1820 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1821 // global_tid); 1822 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1823 auto *FnTy = 1824 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1825 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1826 break; 1827 } 1828 case OMPRTL__kmpc_flush: { 1829 // Build void __kmpc_flush(ident_t *loc); 1830 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1831 auto *FnTy = 1832 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1833 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1834 break; 1835 } 1836 case OMPRTL__kmpc_master: { 1837 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1838 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1839 auto *FnTy = 1840 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1841 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1842 break; 1843 } 1844 case OMPRTL__kmpc_end_master: { 1845 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1846 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1847 auto *FnTy = 1848 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1849 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1850 break; 1851 } 1852 case OMPRTL__kmpc_omp_taskyield: { 1853 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1854 // int end_part); 1855 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1856 auto *FnTy = 1857 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1858 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1859 break; 1860 } 1861 case OMPRTL__kmpc_single: { 1862 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1863 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1864 auto *FnTy = 1865 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1866 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1867 break; 1868 } 1869 case OMPRTL__kmpc_end_single: { 1870 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1871 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1872 auto *FnTy = 1873 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1874 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1875 break; 1876 } 1877 case OMPRTL__kmpc_omp_task_alloc: { 1878 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1879 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1880 // kmp_routine_entry_t *task_entry); 1881 assert(KmpRoutineEntryPtrTy != nullptr && 1882 "Type kmp_routine_entry_t must be created."); 1883 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1884 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1885 // Return void * and then cast to particular kmp_task_t type. 1886 auto *FnTy = 1887 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1888 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1889 break; 1890 } 1891 case OMPRTL__kmpc_omp_task: { 1892 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1893 // *new_task); 1894 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1895 CGM.VoidPtrTy}; 1896 auto *FnTy = 1897 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1898 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1899 break; 1900 } 1901 case OMPRTL__kmpc_copyprivate: { 1902 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1903 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1904 // kmp_int32 didit); 1905 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1906 auto *CpyFnTy = 1907 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1908 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1909 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1910 CGM.Int32Ty}; 1911 auto *FnTy = 1912 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1913 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1914 break; 1915 } 1916 case OMPRTL__kmpc_reduce: { 1917 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1918 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1919 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1920 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1921 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1922 /*isVarArg=*/false); 1923 llvm::Type *TypeParams[] = { 1924 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1925 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1926 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1927 auto *FnTy = 1928 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1929 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1930 break; 1931 } 1932 case OMPRTL__kmpc_reduce_nowait: { 1933 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1934 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1935 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1936 // *lck); 1937 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1938 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1939 /*isVarArg=*/false); 1940 llvm::Type *TypeParams[] = { 1941 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1942 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1943 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1944 auto *FnTy = 1945 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1946 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1947 break; 1948 } 1949 case OMPRTL__kmpc_end_reduce: { 1950 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1951 // kmp_critical_name *lck); 1952 llvm::Type *TypeParams[] = { 1953 getIdentTyPointerTy(), CGM.Int32Ty, 1954 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1955 auto *FnTy = 1956 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1957 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1958 break; 1959 } 1960 case OMPRTL__kmpc_end_reduce_nowait: { 1961 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1962 // kmp_critical_name *lck); 1963 llvm::Type *TypeParams[] = { 1964 getIdentTyPointerTy(), CGM.Int32Ty, 1965 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1966 auto *FnTy = 1967 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1968 RTLFn = 1969 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1970 break; 1971 } 1972 case OMPRTL__kmpc_omp_task_begin_if0: { 1973 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1974 // *new_task); 1975 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1976 CGM.VoidPtrTy}; 1977 auto *FnTy = 1978 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1979 RTLFn = 1980 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1981 break; 1982 } 1983 case OMPRTL__kmpc_omp_task_complete_if0: { 1984 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1985 // *new_task); 1986 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1987 CGM.VoidPtrTy}; 1988 auto *FnTy = 1989 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1990 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1991 /*Name=*/"__kmpc_omp_task_complete_if0"); 1992 break; 1993 } 1994 case OMPRTL__kmpc_ordered: { 1995 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1996 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1997 auto *FnTy = 1998 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1999 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2000 break; 2001 } 2002 case OMPRTL__kmpc_end_ordered: { 2003 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2004 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2005 auto *FnTy = 2006 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2007 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2008 break; 2009 } 2010 case OMPRTL__kmpc_omp_taskwait: { 2011 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2012 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2013 auto *FnTy = 2014 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2015 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2016 break; 2017 } 2018 case OMPRTL__kmpc_taskgroup: { 2019 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2020 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2021 auto *FnTy = 2022 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2023 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2024 break; 2025 } 2026 case OMPRTL__kmpc_end_taskgroup: { 2027 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2028 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2029 auto *FnTy = 2030 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2031 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2032 break; 2033 } 2034 case OMPRTL__kmpc_push_proc_bind: { 2035 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2036 // int proc_bind) 2037 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2038 auto *FnTy = 2039 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2040 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2041 break; 2042 } 2043 case OMPRTL__kmpc_omp_task_with_deps: { 2044 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2045 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2046 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2047 llvm::Type *TypeParams[] = { 2048 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2049 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2050 auto *FnTy = 2051 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2052 RTLFn = 2053 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2054 break; 2055 } 2056 case OMPRTL__kmpc_omp_wait_deps: { 2057 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2058 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2059 // kmp_depend_info_t *noalias_dep_list); 2060 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2061 CGM.Int32Ty, CGM.VoidPtrTy, 2062 CGM.Int32Ty, CGM.VoidPtrTy}; 2063 auto *FnTy = 2064 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2065 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2066 break; 2067 } 2068 case OMPRTL__kmpc_cancellationpoint: { 2069 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2070 // global_tid, kmp_int32 cncl_kind) 2071 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2072 auto *FnTy = 2073 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2074 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2075 break; 2076 } 2077 case OMPRTL__kmpc_cancel: { 2078 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2079 // kmp_int32 cncl_kind) 2080 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2081 auto *FnTy = 2082 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2083 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2084 break; 2085 } 2086 case OMPRTL__kmpc_push_num_teams: { 2087 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2088 // kmp_int32 num_teams, kmp_int32 num_threads) 2089 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2090 CGM.Int32Ty}; 2091 auto *FnTy = 2092 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2093 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2094 break; 2095 } 2096 case OMPRTL__kmpc_fork_teams: { 2097 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2098 // microtask, ...); 2099 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2100 getKmpc_MicroPointerTy()}; 2101 auto *FnTy = 2102 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2103 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2104 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2105 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2106 llvm::LLVMContext &Ctx = F->getContext(); 2107 llvm::MDBuilder MDB(Ctx); 2108 // Annotate the callback behavior of the __kmpc_fork_teams: 2109 // - The callback callee is argument number 2 (microtask). 2110 // - The first two arguments of the callback callee are unknown (-1). 2111 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2112 // callback callee. 2113 F->addMetadata( 2114 llvm::LLVMContext::MD_callback, 2115 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2116 2, {-1, -1}, 2117 /* VarArgsArePassed */ true)})); 2118 } 2119 } 2120 break; 2121 } 2122 case OMPRTL__kmpc_taskloop: { 2123 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2124 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2125 // sched, kmp_uint64 grainsize, void *task_dup); 2126 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2127 CGM.IntTy, 2128 CGM.VoidPtrTy, 2129 CGM.IntTy, 2130 CGM.Int64Ty->getPointerTo(), 2131 CGM.Int64Ty->getPointerTo(), 2132 CGM.Int64Ty, 2133 CGM.IntTy, 2134 CGM.IntTy, 2135 CGM.Int64Ty, 2136 CGM.VoidPtrTy}; 2137 auto *FnTy = 2138 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2139 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2140 break; 2141 } 2142 case OMPRTL__kmpc_doacross_init: { 2143 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2144 // num_dims, struct kmp_dim *dims); 2145 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2146 CGM.Int32Ty, 2147 CGM.Int32Ty, 2148 CGM.VoidPtrTy}; 2149 auto *FnTy = 2150 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2151 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2152 break; 2153 } 2154 case OMPRTL__kmpc_doacross_fini: { 2155 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2156 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2157 auto *FnTy = 2158 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2159 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2160 break; 2161 } 2162 case OMPRTL__kmpc_doacross_post: { 2163 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2164 // *vec); 2165 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2166 CGM.Int64Ty->getPointerTo()}; 2167 auto *FnTy = 2168 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2169 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2170 break; 2171 } 2172 case OMPRTL__kmpc_doacross_wait: { 2173 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2174 // *vec); 2175 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2176 CGM.Int64Ty->getPointerTo()}; 2177 auto *FnTy = 2178 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2179 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2180 break; 2181 } 2182 case OMPRTL__kmpc_task_reduction_init: { 2183 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2184 // *data); 2185 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2186 auto *FnTy = 2187 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2188 RTLFn = 2189 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2190 break; 2191 } 2192 case OMPRTL__kmpc_task_reduction_get_th_data: { 2193 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2194 // *d); 2195 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2196 auto *FnTy = 2197 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2198 RTLFn = CGM.CreateRuntimeFunction( 2199 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2200 break; 2201 } 2202 case OMPRTL__kmpc_alloc: { 2203 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2204 // al); omp_allocator_handle_t type is void *. 2205 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2206 auto *FnTy = 2207 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2208 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2209 break; 2210 } 2211 case OMPRTL__kmpc_free: { 2212 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2213 // al); omp_allocator_handle_t type is void *. 2214 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2215 auto *FnTy = 2216 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2217 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2218 break; 2219 } 2220 case OMPRTL__kmpc_push_target_tripcount: { 2221 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2222 // size); 2223 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2224 llvm::FunctionType *FnTy = 2225 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2226 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2227 break; 2228 } 2229 case OMPRTL__tgt_target: { 2230 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2231 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2232 // *arg_types); 2233 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2234 CGM.VoidPtrTy, 2235 CGM.Int32Ty, 2236 CGM.VoidPtrPtrTy, 2237 CGM.VoidPtrPtrTy, 2238 CGM.SizeTy->getPointerTo(), 2239 CGM.Int64Ty->getPointerTo()}; 2240 auto *FnTy = 2241 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2242 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2243 break; 2244 } 2245 case OMPRTL__tgt_target_nowait: { 2246 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2247 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2248 // int64_t *arg_types); 2249 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2250 CGM.VoidPtrTy, 2251 CGM.Int32Ty, 2252 CGM.VoidPtrPtrTy, 2253 CGM.VoidPtrPtrTy, 2254 CGM.SizeTy->getPointerTo(), 2255 CGM.Int64Ty->getPointerTo()}; 2256 auto *FnTy = 2257 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2258 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2259 break; 2260 } 2261 case OMPRTL__tgt_target_teams: { 2262 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2263 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2264 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2265 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2266 CGM.VoidPtrTy, 2267 CGM.Int32Ty, 2268 CGM.VoidPtrPtrTy, 2269 CGM.VoidPtrPtrTy, 2270 CGM.SizeTy->getPointerTo(), 2271 CGM.Int64Ty->getPointerTo(), 2272 CGM.Int32Ty, 2273 CGM.Int32Ty}; 2274 auto *FnTy = 2275 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2276 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2277 break; 2278 } 2279 case OMPRTL__tgt_target_teams_nowait: { 2280 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2281 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2282 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2283 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2284 CGM.VoidPtrTy, 2285 CGM.Int32Ty, 2286 CGM.VoidPtrPtrTy, 2287 CGM.VoidPtrPtrTy, 2288 CGM.SizeTy->getPointerTo(), 2289 CGM.Int64Ty->getPointerTo(), 2290 CGM.Int32Ty, 2291 CGM.Int32Ty}; 2292 auto *FnTy = 2293 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2294 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2295 break; 2296 } 2297 case OMPRTL__tgt_register_lib: { 2298 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2299 QualType ParamTy = 2300 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2301 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2302 auto *FnTy = 2303 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2304 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2305 break; 2306 } 2307 case OMPRTL__tgt_unregister_lib: { 2308 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2309 QualType ParamTy = 2310 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2311 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2312 auto *FnTy = 2313 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2314 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2315 break; 2316 } 2317 case OMPRTL__tgt_target_data_begin: { 2318 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2319 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2320 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2321 CGM.Int32Ty, 2322 CGM.VoidPtrPtrTy, 2323 CGM.VoidPtrPtrTy, 2324 CGM.SizeTy->getPointerTo(), 2325 CGM.Int64Ty->getPointerTo()}; 2326 auto *FnTy = 2327 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2328 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2329 break; 2330 } 2331 case OMPRTL__tgt_target_data_begin_nowait: { 2332 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2333 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2334 // *arg_types); 2335 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2336 CGM.Int32Ty, 2337 CGM.VoidPtrPtrTy, 2338 CGM.VoidPtrPtrTy, 2339 CGM.SizeTy->getPointerTo(), 2340 CGM.Int64Ty->getPointerTo()}; 2341 auto *FnTy = 2342 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2343 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2344 break; 2345 } 2346 case OMPRTL__tgt_target_data_end: { 2347 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2348 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2349 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2350 CGM.Int32Ty, 2351 CGM.VoidPtrPtrTy, 2352 CGM.VoidPtrPtrTy, 2353 CGM.SizeTy->getPointerTo(), 2354 CGM.Int64Ty->getPointerTo()}; 2355 auto *FnTy = 2356 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2357 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2358 break; 2359 } 2360 case OMPRTL__tgt_target_data_end_nowait: { 2361 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2362 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2363 // *arg_types); 2364 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2365 CGM.Int32Ty, 2366 CGM.VoidPtrPtrTy, 2367 CGM.VoidPtrPtrTy, 2368 CGM.SizeTy->getPointerTo(), 2369 CGM.Int64Ty->getPointerTo()}; 2370 auto *FnTy = 2371 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2372 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2373 break; 2374 } 2375 case OMPRTL__tgt_target_data_update: { 2376 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2377 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2378 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2379 CGM.Int32Ty, 2380 CGM.VoidPtrPtrTy, 2381 CGM.VoidPtrPtrTy, 2382 CGM.SizeTy->getPointerTo(), 2383 CGM.Int64Ty->getPointerTo()}; 2384 auto *FnTy = 2385 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2386 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2387 break; 2388 } 2389 case OMPRTL__tgt_target_data_update_nowait: { 2390 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2391 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2392 // *arg_types); 2393 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2394 CGM.Int32Ty, 2395 CGM.VoidPtrPtrTy, 2396 CGM.VoidPtrPtrTy, 2397 CGM.SizeTy->getPointerTo(), 2398 CGM.Int64Ty->getPointerTo()}; 2399 auto *FnTy = 2400 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2401 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2402 break; 2403 } 2404 } 2405 assert(RTLFn && "Unable to find OpenMP runtime function"); 2406 return RTLFn; 2407 } 2408 2409 llvm::FunctionCallee 2410 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2411 assert((IVSize == 32 || IVSize == 64) && 2412 "IV size is not compatible with the omp runtime"); 2413 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2414 : "__kmpc_for_static_init_4u") 2415 : (IVSigned ? "__kmpc_for_static_init_8" 2416 : "__kmpc_for_static_init_8u"); 2417 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2418 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2419 llvm::Type *TypeParams[] = { 2420 getIdentTyPointerTy(), // loc 2421 CGM.Int32Ty, // tid 2422 CGM.Int32Ty, // schedtype 2423 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2424 PtrTy, // p_lower 2425 PtrTy, // p_upper 2426 PtrTy, // p_stride 2427 ITy, // incr 2428 ITy // chunk 2429 }; 2430 auto *FnTy = 2431 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2432 return CGM.CreateRuntimeFunction(FnTy, Name); 2433 } 2434 2435 llvm::FunctionCallee 2436 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2437 assert((IVSize == 32 || IVSize == 64) && 2438 "IV size is not compatible with the omp runtime"); 2439 StringRef Name = 2440 IVSize == 32 2441 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2442 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2443 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2444 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2445 CGM.Int32Ty, // tid 2446 CGM.Int32Ty, // schedtype 2447 ITy, // lower 2448 ITy, // upper 2449 ITy, // stride 2450 ITy // chunk 2451 }; 2452 auto *FnTy = 2453 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2454 return CGM.CreateRuntimeFunction(FnTy, Name); 2455 } 2456 2457 llvm::FunctionCallee 2458 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2459 assert((IVSize == 32 || IVSize == 64) && 2460 "IV size is not compatible with the omp runtime"); 2461 StringRef Name = 2462 IVSize == 32 2463 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2464 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2465 llvm::Type *TypeParams[] = { 2466 getIdentTyPointerTy(), // loc 2467 CGM.Int32Ty, // tid 2468 }; 2469 auto *FnTy = 2470 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2471 return CGM.CreateRuntimeFunction(FnTy, Name); 2472 } 2473 2474 llvm::FunctionCallee 2475 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2476 assert((IVSize == 32 || IVSize == 64) && 2477 "IV size is not compatible with the omp runtime"); 2478 StringRef Name = 2479 IVSize == 32 2480 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2481 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2482 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2483 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2484 llvm::Type *TypeParams[] = { 2485 getIdentTyPointerTy(), // loc 2486 CGM.Int32Ty, // tid 2487 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2488 PtrTy, // p_lower 2489 PtrTy, // p_upper 2490 PtrTy // p_stride 2491 }; 2492 auto *FnTy = 2493 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2494 return CGM.CreateRuntimeFunction(FnTy, Name); 2495 } 2496 2497 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { 2498 if (CGM.getLangOpts().OpenMPSimd) 2499 return Address::invalid(); 2500 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2501 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2502 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { 2503 SmallString<64> PtrName; 2504 { 2505 llvm::raw_svector_ostream OS(PtrName); 2506 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; 2507 } 2508 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2509 if (!Ptr) { 2510 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2511 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2512 PtrName); 2513 if (!CGM.getLangOpts().OpenMPIsDevice) { 2514 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2515 GV->setLinkage(llvm::GlobalValue::ExternalLinkage); 2516 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2517 } 2518 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); 2519 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2520 } 2521 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2522 } 2523 return Address::invalid(); 2524 } 2525 2526 llvm::Constant * 2527 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2528 assert(!CGM.getLangOpts().OpenMPUseTLS || 2529 !CGM.getContext().getTargetInfo().isTLSSupported()); 2530 // Lookup the entry, lazily creating it if necessary. 2531 std::string Suffix = getName({"cache", ""}); 2532 return getOrCreateInternalVariable( 2533 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2534 } 2535 2536 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2537 const VarDecl *VD, 2538 Address VDAddr, 2539 SourceLocation Loc) { 2540 if (CGM.getLangOpts().OpenMPUseTLS && 2541 CGM.getContext().getTargetInfo().isTLSSupported()) 2542 return VDAddr; 2543 2544 llvm::Type *VarTy = VDAddr.getElementType(); 2545 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2546 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2547 CGM.Int8PtrTy), 2548 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2549 getOrCreateThreadPrivateCache(VD)}; 2550 return Address(CGF.EmitRuntimeCall( 2551 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2552 VDAddr.getAlignment()); 2553 } 2554 2555 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2556 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2557 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2558 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2559 // library. 2560 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2561 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2562 OMPLoc); 2563 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2564 // to register constructor/destructor for variable. 2565 llvm::Value *Args[] = { 2566 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2567 Ctor, CopyCtor, Dtor}; 2568 CGF.EmitRuntimeCall( 2569 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2570 } 2571 2572 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2573 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2574 bool PerformInit, CodeGenFunction *CGF) { 2575 if (CGM.getLangOpts().OpenMPUseTLS && 2576 CGM.getContext().getTargetInfo().isTLSSupported()) 2577 return nullptr; 2578 2579 VD = VD->getDefinition(CGM.getContext()); 2580 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2581 QualType ASTTy = VD->getType(); 2582 2583 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2584 const Expr *Init = VD->getAnyInitializer(); 2585 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2586 // Generate function that re-emits the declaration's initializer into the 2587 // threadprivate copy of the variable VD 2588 CodeGenFunction CtorCGF(CGM); 2589 FunctionArgList Args; 2590 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2591 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2592 ImplicitParamDecl::Other); 2593 Args.push_back(&Dst); 2594 2595 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2596 CGM.getContext().VoidPtrTy, Args); 2597 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2598 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2599 llvm::Function *Fn = 2600 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2601 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2602 Args, Loc, Loc); 2603 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2604 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2605 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2606 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2607 Arg = CtorCGF.Builder.CreateElementBitCast( 2608 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2609 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2610 /*IsInitializer=*/true); 2611 ArgVal = CtorCGF.EmitLoadOfScalar( 2612 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2613 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2614 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2615 CtorCGF.FinishFunction(); 2616 Ctor = Fn; 2617 } 2618 if (VD->getType().isDestructedType() != QualType::DK_none) { 2619 // Generate function that emits destructor call for the threadprivate copy 2620 // of the variable VD 2621 CodeGenFunction DtorCGF(CGM); 2622 FunctionArgList Args; 2623 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2624 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2625 ImplicitParamDecl::Other); 2626 Args.push_back(&Dst); 2627 2628 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2629 CGM.getContext().VoidTy, Args); 2630 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2631 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2632 llvm::Function *Fn = 2633 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2634 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2635 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2636 Loc, Loc); 2637 // Create a scope with an artificial location for the body of this function. 2638 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2639 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2640 DtorCGF.GetAddrOfLocalVar(&Dst), 2641 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2642 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2643 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2644 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2645 DtorCGF.FinishFunction(); 2646 Dtor = Fn; 2647 } 2648 // Do not emit init function if it is not required. 2649 if (!Ctor && !Dtor) 2650 return nullptr; 2651 2652 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2653 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2654 /*isVarArg=*/false) 2655 ->getPointerTo(); 2656 // Copying constructor for the threadprivate variable. 2657 // Must be NULL - reserved by runtime, but currently it requires that this 2658 // parameter is always NULL. Otherwise it fires assertion. 2659 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2660 if (Ctor == nullptr) { 2661 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2662 /*isVarArg=*/false) 2663 ->getPointerTo(); 2664 Ctor = llvm::Constant::getNullValue(CtorTy); 2665 } 2666 if (Dtor == nullptr) { 2667 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2668 /*isVarArg=*/false) 2669 ->getPointerTo(); 2670 Dtor = llvm::Constant::getNullValue(DtorTy); 2671 } 2672 if (!CGF) { 2673 auto *InitFunctionTy = 2674 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2675 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2676 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2677 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2678 CodeGenFunction InitCGF(CGM); 2679 FunctionArgList ArgList; 2680 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2681 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2682 Loc, Loc); 2683 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2684 InitCGF.FinishFunction(); 2685 return InitFunction; 2686 } 2687 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2688 } 2689 return nullptr; 2690 } 2691 2692 /// Obtain information that uniquely identifies a target entry. This 2693 /// consists of the file and device IDs as well as line number associated with 2694 /// the relevant entry source location. 2695 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2696 unsigned &DeviceID, unsigned &FileID, 2697 unsigned &LineNum) { 2698 SourceManager &SM = C.getSourceManager(); 2699 2700 // The loc should be always valid and have a file ID (the user cannot use 2701 // #pragma directives in macros) 2702 2703 assert(Loc.isValid() && "Source location is expected to be always valid."); 2704 2705 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2706 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2707 2708 llvm::sys::fs::UniqueID ID; 2709 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2710 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2711 << PLoc.getFilename() << EC.message(); 2712 2713 DeviceID = ID.getDevice(); 2714 FileID = ID.getFile(); 2715 LineNum = PLoc.getLine(); 2716 } 2717 2718 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2719 llvm::GlobalVariable *Addr, 2720 bool PerformInit) { 2721 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2722 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2723 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) 2724 return CGM.getLangOpts().OpenMPIsDevice; 2725 VD = VD->getDefinition(CGM.getContext()); 2726 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2727 return CGM.getLangOpts().OpenMPIsDevice; 2728 2729 QualType ASTTy = VD->getType(); 2730 2731 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2732 // Produce the unique prefix to identify the new target regions. We use 2733 // the source location of the variable declaration which we know to not 2734 // conflict with any target region. 2735 unsigned DeviceID; 2736 unsigned FileID; 2737 unsigned Line; 2738 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2739 SmallString<128> Buffer, Out; 2740 { 2741 llvm::raw_svector_ostream OS(Buffer); 2742 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2743 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2744 } 2745 2746 const Expr *Init = VD->getAnyInitializer(); 2747 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2748 llvm::Constant *Ctor; 2749 llvm::Constant *ID; 2750 if (CGM.getLangOpts().OpenMPIsDevice) { 2751 // Generate function that re-emits the declaration's initializer into 2752 // the threadprivate copy of the variable VD 2753 CodeGenFunction CtorCGF(CGM); 2754 2755 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2756 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2757 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2758 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2759 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2760 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2761 FunctionArgList(), Loc, Loc); 2762 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2763 CtorCGF.EmitAnyExprToMem(Init, 2764 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2765 Init->getType().getQualifiers(), 2766 /*IsInitializer=*/true); 2767 CtorCGF.FinishFunction(); 2768 Ctor = Fn; 2769 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2770 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2771 } else { 2772 Ctor = new llvm::GlobalVariable( 2773 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2774 llvm::GlobalValue::PrivateLinkage, 2775 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2776 ID = Ctor; 2777 } 2778 2779 // Register the information for the entry associated with the constructor. 2780 Out.clear(); 2781 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2782 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2783 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2784 } 2785 if (VD->getType().isDestructedType() != QualType::DK_none) { 2786 llvm::Constant *Dtor; 2787 llvm::Constant *ID; 2788 if (CGM.getLangOpts().OpenMPIsDevice) { 2789 // Generate function that emits destructor call for the threadprivate 2790 // copy of the variable VD 2791 CodeGenFunction DtorCGF(CGM); 2792 2793 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2794 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2795 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2796 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2797 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2798 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2799 FunctionArgList(), Loc, Loc); 2800 // Create a scope with an artificial location for the body of this 2801 // function. 2802 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2803 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2804 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2805 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2806 DtorCGF.FinishFunction(); 2807 Dtor = Fn; 2808 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2809 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2810 } else { 2811 Dtor = new llvm::GlobalVariable( 2812 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2813 llvm::GlobalValue::PrivateLinkage, 2814 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2815 ID = Dtor; 2816 } 2817 // Register the information for the entry associated with the destructor. 2818 Out.clear(); 2819 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2820 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2821 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2822 } 2823 return CGM.getLangOpts().OpenMPIsDevice; 2824 } 2825 2826 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2827 QualType VarType, 2828 StringRef Name) { 2829 std::string Suffix = getName({"artificial", ""}); 2830 std::string CacheSuffix = getName({"cache", ""}); 2831 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2832 llvm::Value *GAddr = 2833 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2834 llvm::Value *Args[] = { 2835 emitUpdateLocation(CGF, SourceLocation()), 2836 getThreadID(CGF, SourceLocation()), 2837 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2838 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2839 /*IsSigned=*/false), 2840 getOrCreateInternalVariable( 2841 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2842 return Address( 2843 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2844 CGF.EmitRuntimeCall( 2845 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2846 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2847 CGM.getPointerAlign()); 2848 } 2849 2850 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2851 const RegionCodeGenTy &ThenGen, 2852 const RegionCodeGenTy &ElseGen) { 2853 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2854 2855 // If the condition constant folds and can be elided, try to avoid emitting 2856 // the condition and the dead arm of the if/else. 2857 bool CondConstant; 2858 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2859 if (CondConstant) 2860 ThenGen(CGF); 2861 else 2862 ElseGen(CGF); 2863 return; 2864 } 2865 2866 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2867 // emit the conditional branch. 2868 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2869 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2870 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2871 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2872 2873 // Emit the 'then' code. 2874 CGF.EmitBlock(ThenBlock); 2875 ThenGen(CGF); 2876 CGF.EmitBranch(ContBlock); 2877 // Emit the 'else' code if present. 2878 // There is no need to emit line number for unconditional branch. 2879 (void)ApplyDebugLocation::CreateEmpty(CGF); 2880 CGF.EmitBlock(ElseBlock); 2881 ElseGen(CGF); 2882 // There is no need to emit line number for unconditional branch. 2883 (void)ApplyDebugLocation::CreateEmpty(CGF); 2884 CGF.EmitBranch(ContBlock); 2885 // Emit the continuation block for code after the if. 2886 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2887 } 2888 2889 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2890 llvm::Function *OutlinedFn, 2891 ArrayRef<llvm::Value *> CapturedVars, 2892 const Expr *IfCond) { 2893 if (!CGF.HaveInsertPoint()) 2894 return; 2895 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2896 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2897 PrePostActionTy &) { 2898 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2899 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2900 llvm::Value *Args[] = { 2901 RTLoc, 2902 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2903 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2904 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2905 RealArgs.append(std::begin(Args), std::end(Args)); 2906 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2907 2908 llvm::FunctionCallee RTLFn = 2909 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2910 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2911 }; 2912 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2913 PrePostActionTy &) { 2914 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2915 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2916 // Build calls: 2917 // __kmpc_serialized_parallel(&Loc, GTid); 2918 llvm::Value *Args[] = {RTLoc, ThreadID}; 2919 CGF.EmitRuntimeCall( 2920 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2921 2922 // OutlinedFn(>id, &zero, CapturedStruct); 2923 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2924 /*Name*/ ".zero.addr"); 2925 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2926 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2927 // ThreadId for serialized parallels is 0. 2928 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2929 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2930 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2931 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2932 2933 // __kmpc_end_serialized_parallel(&Loc, GTid); 2934 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2935 CGF.EmitRuntimeCall( 2936 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2937 EndArgs); 2938 }; 2939 if (IfCond) { 2940 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2941 } else { 2942 RegionCodeGenTy ThenRCG(ThenGen); 2943 ThenRCG(CGF); 2944 } 2945 } 2946 2947 // If we're inside an (outlined) parallel region, use the region info's 2948 // thread-ID variable (it is passed in a first argument of the outlined function 2949 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2950 // regular serial code region, get thread ID by calling kmp_int32 2951 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2952 // return the address of that temp. 2953 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2954 SourceLocation Loc) { 2955 if (auto *OMPRegionInfo = 2956 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2957 if (OMPRegionInfo->getThreadIDVariable()) 2958 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2959 2960 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2961 QualType Int32Ty = 2962 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2963 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2964 CGF.EmitStoreOfScalar(ThreadID, 2965 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2966 2967 return ThreadIDTemp; 2968 } 2969 2970 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2971 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2972 SmallString<256> Buffer; 2973 llvm::raw_svector_ostream Out(Buffer); 2974 Out << Name; 2975 StringRef RuntimeName = Out.str(); 2976 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2977 if (Elem.second) { 2978 assert(Elem.second->getType()->getPointerElementType() == Ty && 2979 "OMP internal variable has different type than requested"); 2980 return &*Elem.second; 2981 } 2982 2983 return Elem.second = new llvm::GlobalVariable( 2984 CGM.getModule(), Ty, /*IsConstant*/ false, 2985 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2986 Elem.first(), /*InsertBefore=*/nullptr, 2987 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2988 } 2989 2990 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2991 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2992 std::string Name = getName({Prefix, "var"}); 2993 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2994 } 2995 2996 namespace { 2997 /// Common pre(post)-action for different OpenMP constructs. 2998 class CommonActionTy final : public PrePostActionTy { 2999 llvm::FunctionCallee EnterCallee; 3000 ArrayRef<llvm::Value *> EnterArgs; 3001 llvm::FunctionCallee ExitCallee; 3002 ArrayRef<llvm::Value *> ExitArgs; 3003 bool Conditional; 3004 llvm::BasicBlock *ContBlock = nullptr; 3005 3006 public: 3007 CommonActionTy(llvm::FunctionCallee EnterCallee, 3008 ArrayRef<llvm::Value *> EnterArgs, 3009 llvm::FunctionCallee ExitCallee, 3010 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3011 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3012 ExitArgs(ExitArgs), Conditional(Conditional) {} 3013 void Enter(CodeGenFunction &CGF) override { 3014 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3015 if (Conditional) { 3016 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3017 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3018 ContBlock = CGF.createBasicBlock("omp_if.end"); 3019 // Generate the branch (If-stmt) 3020 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3021 CGF.EmitBlock(ThenBlock); 3022 } 3023 } 3024 void Done(CodeGenFunction &CGF) { 3025 // Emit the rest of blocks/branches 3026 CGF.EmitBranch(ContBlock); 3027 CGF.EmitBlock(ContBlock, true); 3028 } 3029 void Exit(CodeGenFunction &CGF) override { 3030 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3031 } 3032 }; 3033 } // anonymous namespace 3034 3035 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3036 StringRef CriticalName, 3037 const RegionCodeGenTy &CriticalOpGen, 3038 SourceLocation Loc, const Expr *Hint) { 3039 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3040 // CriticalOpGen(); 3041 // __kmpc_end_critical(ident_t *, gtid, Lock); 3042 // Prepare arguments and build a call to __kmpc_critical 3043 if (!CGF.HaveInsertPoint()) 3044 return; 3045 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3046 getCriticalRegionLock(CriticalName)}; 3047 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3048 std::end(Args)); 3049 if (Hint) { 3050 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3051 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3052 } 3053 CommonActionTy Action( 3054 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3055 : OMPRTL__kmpc_critical), 3056 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3057 CriticalOpGen.setAction(Action); 3058 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3059 } 3060 3061 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3062 const RegionCodeGenTy &MasterOpGen, 3063 SourceLocation Loc) { 3064 if (!CGF.HaveInsertPoint()) 3065 return; 3066 // if(__kmpc_master(ident_t *, gtid)) { 3067 // MasterOpGen(); 3068 // __kmpc_end_master(ident_t *, gtid); 3069 // } 3070 // Prepare arguments and build a call to __kmpc_master 3071 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3072 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3073 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3074 /*Conditional=*/true); 3075 MasterOpGen.setAction(Action); 3076 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3077 Action.Done(CGF); 3078 } 3079 3080 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3081 SourceLocation Loc) { 3082 if (!CGF.HaveInsertPoint()) 3083 return; 3084 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3085 llvm::Value *Args[] = { 3086 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3087 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3088 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3089 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3090 Region->emitUntiedSwitch(CGF); 3091 } 3092 3093 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3094 const RegionCodeGenTy &TaskgroupOpGen, 3095 SourceLocation Loc) { 3096 if (!CGF.HaveInsertPoint()) 3097 return; 3098 // __kmpc_taskgroup(ident_t *, gtid); 3099 // TaskgroupOpGen(); 3100 // __kmpc_end_taskgroup(ident_t *, gtid); 3101 // Prepare arguments and build a call to __kmpc_taskgroup 3102 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3103 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3104 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3105 Args); 3106 TaskgroupOpGen.setAction(Action); 3107 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3108 } 3109 3110 /// Given an array of pointers to variables, project the address of a 3111 /// given variable. 3112 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3113 unsigned Index, const VarDecl *Var) { 3114 // Pull out the pointer to the variable. 3115 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3116 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3117 3118 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3119 Addr = CGF.Builder.CreateElementBitCast( 3120 Addr, CGF.ConvertTypeForMem(Var->getType())); 3121 return Addr; 3122 } 3123 3124 static llvm::Value *emitCopyprivateCopyFunction( 3125 CodeGenModule &CGM, llvm::Type *ArgsType, 3126 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3127 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3128 SourceLocation Loc) { 3129 ASTContext &C = CGM.getContext(); 3130 // void copy_func(void *LHSArg, void *RHSArg); 3131 FunctionArgList Args; 3132 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3133 ImplicitParamDecl::Other); 3134 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3135 ImplicitParamDecl::Other); 3136 Args.push_back(&LHSArg); 3137 Args.push_back(&RHSArg); 3138 const auto &CGFI = 3139 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3140 std::string Name = 3141 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3142 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3143 llvm::GlobalValue::InternalLinkage, Name, 3144 &CGM.getModule()); 3145 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3146 Fn->setDoesNotRecurse(); 3147 CodeGenFunction CGF(CGM); 3148 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3149 // Dest = (void*[n])(LHSArg); 3150 // Src = (void*[n])(RHSArg); 3151 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3152 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3153 ArgsType), CGF.getPointerAlign()); 3154 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3155 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3156 ArgsType), CGF.getPointerAlign()); 3157 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3158 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3159 // ... 3160 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3161 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3162 const auto *DestVar = 3163 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3164 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3165 3166 const auto *SrcVar = 3167 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3168 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3169 3170 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3171 QualType Type = VD->getType(); 3172 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3173 } 3174 CGF.FinishFunction(); 3175 return Fn; 3176 } 3177 3178 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3179 const RegionCodeGenTy &SingleOpGen, 3180 SourceLocation Loc, 3181 ArrayRef<const Expr *> CopyprivateVars, 3182 ArrayRef<const Expr *> SrcExprs, 3183 ArrayRef<const Expr *> DstExprs, 3184 ArrayRef<const Expr *> AssignmentOps) { 3185 if (!CGF.HaveInsertPoint()) 3186 return; 3187 assert(CopyprivateVars.size() == SrcExprs.size() && 3188 CopyprivateVars.size() == DstExprs.size() && 3189 CopyprivateVars.size() == AssignmentOps.size()); 3190 ASTContext &C = CGM.getContext(); 3191 // int32 did_it = 0; 3192 // if(__kmpc_single(ident_t *, gtid)) { 3193 // SingleOpGen(); 3194 // __kmpc_end_single(ident_t *, gtid); 3195 // did_it = 1; 3196 // } 3197 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3198 // <copy_func>, did_it); 3199 3200 Address DidIt = Address::invalid(); 3201 if (!CopyprivateVars.empty()) { 3202 // int32 did_it = 0; 3203 QualType KmpInt32Ty = 3204 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3205 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3206 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3207 } 3208 // Prepare arguments and build a call to __kmpc_single 3209 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3210 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3211 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3212 /*Conditional=*/true); 3213 SingleOpGen.setAction(Action); 3214 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3215 if (DidIt.isValid()) { 3216 // did_it = 1; 3217 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3218 } 3219 Action.Done(CGF); 3220 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3221 // <copy_func>, did_it); 3222 if (DidIt.isValid()) { 3223 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3224 QualType CopyprivateArrayTy = 3225 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3226 /*IndexTypeQuals=*/0); 3227 // Create a list of all private variables for copyprivate. 3228 Address CopyprivateList = 3229 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3230 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3231 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3232 CGF.Builder.CreateStore( 3233 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3234 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3235 Elem); 3236 } 3237 // Build function that copies private values from single region to all other 3238 // threads in the corresponding parallel region. 3239 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3240 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3241 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3242 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3243 Address CL = 3244 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3245 CGF.VoidPtrTy); 3246 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3247 llvm::Value *Args[] = { 3248 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3249 getThreadID(CGF, Loc), // i32 <gtid> 3250 BufSize, // size_t <buf_size> 3251 CL.getPointer(), // void *<copyprivate list> 3252 CpyFn, // void (*) (void *, void *) <copy_func> 3253 DidItVal // i32 did_it 3254 }; 3255 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3256 } 3257 } 3258 3259 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3260 const RegionCodeGenTy &OrderedOpGen, 3261 SourceLocation Loc, bool IsThreads) { 3262 if (!CGF.HaveInsertPoint()) 3263 return; 3264 // __kmpc_ordered(ident_t *, gtid); 3265 // OrderedOpGen(); 3266 // __kmpc_end_ordered(ident_t *, gtid); 3267 // Prepare arguments and build a call to __kmpc_ordered 3268 if (IsThreads) { 3269 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3270 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3271 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3272 Args); 3273 OrderedOpGen.setAction(Action); 3274 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3275 return; 3276 } 3277 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3278 } 3279 3280 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3281 unsigned Flags; 3282 if (Kind == OMPD_for) 3283 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3284 else if (Kind == OMPD_sections) 3285 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3286 else if (Kind == OMPD_single) 3287 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3288 else if (Kind == OMPD_barrier) 3289 Flags = OMP_IDENT_BARRIER_EXPL; 3290 else 3291 Flags = OMP_IDENT_BARRIER_IMPL; 3292 return Flags; 3293 } 3294 3295 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3296 CodeGenFunction &CGF, const OMPLoopDirective &S, 3297 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3298 // Check if the loop directive is actually a doacross loop directive. In this 3299 // case choose static, 1 schedule. 3300 if (llvm::any_of( 3301 S.getClausesOfKind<OMPOrderedClause>(), 3302 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3303 ScheduleKind = OMPC_SCHEDULE_static; 3304 // Chunk size is 1 in this case. 3305 llvm::APInt ChunkSize(32, 1); 3306 ChunkExpr = IntegerLiteral::Create( 3307 CGF.getContext(), ChunkSize, 3308 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3309 SourceLocation()); 3310 } 3311 } 3312 3313 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3314 OpenMPDirectiveKind Kind, bool EmitChecks, 3315 bool ForceSimpleCall) { 3316 if (!CGF.HaveInsertPoint()) 3317 return; 3318 // Build call __kmpc_cancel_barrier(loc, thread_id); 3319 // Build call __kmpc_barrier(loc, thread_id); 3320 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3321 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3322 // thread_id); 3323 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3324 getThreadID(CGF, Loc)}; 3325 if (auto *OMPRegionInfo = 3326 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3327 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3328 llvm::Value *Result = CGF.EmitRuntimeCall( 3329 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3330 if (EmitChecks) { 3331 // if (__kmpc_cancel_barrier()) { 3332 // exit from construct; 3333 // } 3334 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3335 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3336 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3337 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3338 CGF.EmitBlock(ExitBB); 3339 // exit from construct; 3340 CodeGenFunction::JumpDest CancelDestination = 3341 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3342 CGF.EmitBranchThroughCleanup(CancelDestination); 3343 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3344 } 3345 return; 3346 } 3347 } 3348 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3349 } 3350 3351 /// Map the OpenMP loop schedule to the runtime enumeration. 3352 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3353 bool Chunked, bool Ordered) { 3354 switch (ScheduleKind) { 3355 case OMPC_SCHEDULE_static: 3356 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3357 : (Ordered ? OMP_ord_static : OMP_sch_static); 3358 case OMPC_SCHEDULE_dynamic: 3359 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3360 case OMPC_SCHEDULE_guided: 3361 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3362 case OMPC_SCHEDULE_runtime: 3363 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3364 case OMPC_SCHEDULE_auto: 3365 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3366 case OMPC_SCHEDULE_unknown: 3367 assert(!Chunked && "chunk was specified but schedule kind not known"); 3368 return Ordered ? OMP_ord_static : OMP_sch_static; 3369 } 3370 llvm_unreachable("Unexpected runtime schedule"); 3371 } 3372 3373 /// Map the OpenMP distribute schedule to the runtime enumeration. 3374 static OpenMPSchedType 3375 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3376 // only static is allowed for dist_schedule 3377 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3378 } 3379 3380 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3381 bool Chunked) const { 3382 OpenMPSchedType Schedule = 3383 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3384 return Schedule == OMP_sch_static; 3385 } 3386 3387 bool CGOpenMPRuntime::isStaticNonchunked( 3388 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3389 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3390 return Schedule == OMP_dist_sch_static; 3391 } 3392 3393 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3394 bool Chunked) const { 3395 OpenMPSchedType Schedule = 3396 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3397 return Schedule == OMP_sch_static_chunked; 3398 } 3399 3400 bool CGOpenMPRuntime::isStaticChunked( 3401 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3402 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3403 return Schedule == OMP_dist_sch_static_chunked; 3404 } 3405 3406 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3407 OpenMPSchedType Schedule = 3408 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3409 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3410 return Schedule != OMP_sch_static; 3411 } 3412 3413 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3414 OpenMPScheduleClauseModifier M1, 3415 OpenMPScheduleClauseModifier M2) { 3416 int Modifier = 0; 3417 switch (M1) { 3418 case OMPC_SCHEDULE_MODIFIER_monotonic: 3419 Modifier = OMP_sch_modifier_monotonic; 3420 break; 3421 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3422 Modifier = OMP_sch_modifier_nonmonotonic; 3423 break; 3424 case OMPC_SCHEDULE_MODIFIER_simd: 3425 if (Schedule == OMP_sch_static_chunked) 3426 Schedule = OMP_sch_static_balanced_chunked; 3427 break; 3428 case OMPC_SCHEDULE_MODIFIER_last: 3429 case OMPC_SCHEDULE_MODIFIER_unknown: 3430 break; 3431 } 3432 switch (M2) { 3433 case OMPC_SCHEDULE_MODIFIER_monotonic: 3434 Modifier = OMP_sch_modifier_monotonic; 3435 break; 3436 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3437 Modifier = OMP_sch_modifier_nonmonotonic; 3438 break; 3439 case OMPC_SCHEDULE_MODIFIER_simd: 3440 if (Schedule == OMP_sch_static_chunked) 3441 Schedule = OMP_sch_static_balanced_chunked; 3442 break; 3443 case OMPC_SCHEDULE_MODIFIER_last: 3444 case OMPC_SCHEDULE_MODIFIER_unknown: 3445 break; 3446 } 3447 return Schedule | Modifier; 3448 } 3449 3450 void CGOpenMPRuntime::emitForDispatchInit( 3451 CodeGenFunction &CGF, SourceLocation Loc, 3452 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3453 bool Ordered, const DispatchRTInput &DispatchValues) { 3454 if (!CGF.HaveInsertPoint()) 3455 return; 3456 OpenMPSchedType Schedule = getRuntimeSchedule( 3457 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3458 assert(Ordered || 3459 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3460 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3461 Schedule != OMP_sch_static_balanced_chunked)); 3462 // Call __kmpc_dispatch_init( 3463 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3464 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3465 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3466 3467 // If the Chunk was not specified in the clause - use default value 1. 3468 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3469 : CGF.Builder.getIntN(IVSize, 1); 3470 llvm::Value *Args[] = { 3471 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3472 CGF.Builder.getInt32(addMonoNonMonoModifier( 3473 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3474 DispatchValues.LB, // Lower 3475 DispatchValues.UB, // Upper 3476 CGF.Builder.getIntN(IVSize, 1), // Stride 3477 Chunk // Chunk 3478 }; 3479 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3480 } 3481 3482 static void emitForStaticInitCall( 3483 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3484 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3485 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3486 const CGOpenMPRuntime::StaticRTInput &Values) { 3487 if (!CGF.HaveInsertPoint()) 3488 return; 3489 3490 assert(!Values.Ordered); 3491 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3492 Schedule == OMP_sch_static_balanced_chunked || 3493 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3494 Schedule == OMP_dist_sch_static || 3495 Schedule == OMP_dist_sch_static_chunked); 3496 3497 // Call __kmpc_for_static_init( 3498 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3499 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3500 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3501 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3502 llvm::Value *Chunk = Values.Chunk; 3503 if (Chunk == nullptr) { 3504 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3505 Schedule == OMP_dist_sch_static) && 3506 "expected static non-chunked schedule"); 3507 // If the Chunk was not specified in the clause - use default value 1. 3508 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3509 } else { 3510 assert((Schedule == OMP_sch_static_chunked || 3511 Schedule == OMP_sch_static_balanced_chunked || 3512 Schedule == OMP_ord_static_chunked || 3513 Schedule == OMP_dist_sch_static_chunked) && 3514 "expected static chunked schedule"); 3515 } 3516 llvm::Value *Args[] = { 3517 UpdateLocation, 3518 ThreadId, 3519 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3520 M2)), // Schedule type 3521 Values.IL.getPointer(), // &isLastIter 3522 Values.LB.getPointer(), // &LB 3523 Values.UB.getPointer(), // &UB 3524 Values.ST.getPointer(), // &Stride 3525 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3526 Chunk // Chunk 3527 }; 3528 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3529 } 3530 3531 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3532 SourceLocation Loc, 3533 OpenMPDirectiveKind DKind, 3534 const OpenMPScheduleTy &ScheduleKind, 3535 const StaticRTInput &Values) { 3536 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3537 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3538 assert(isOpenMPWorksharingDirective(DKind) && 3539 "Expected loop-based or sections-based directive."); 3540 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3541 isOpenMPLoopDirective(DKind) 3542 ? OMP_IDENT_WORK_LOOP 3543 : OMP_IDENT_WORK_SECTIONS); 3544 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3545 llvm::FunctionCallee StaticInitFunction = 3546 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3547 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3548 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3549 } 3550 3551 void CGOpenMPRuntime::emitDistributeStaticInit( 3552 CodeGenFunction &CGF, SourceLocation Loc, 3553 OpenMPDistScheduleClauseKind SchedKind, 3554 const CGOpenMPRuntime::StaticRTInput &Values) { 3555 OpenMPSchedType ScheduleNum = 3556 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3557 llvm::Value *UpdatedLocation = 3558 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3559 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3560 llvm::FunctionCallee StaticInitFunction = 3561 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3562 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3563 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3564 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3565 } 3566 3567 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3568 SourceLocation Loc, 3569 OpenMPDirectiveKind DKind) { 3570 if (!CGF.HaveInsertPoint()) 3571 return; 3572 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3573 llvm::Value *Args[] = { 3574 emitUpdateLocation(CGF, Loc, 3575 isOpenMPDistributeDirective(DKind) 3576 ? OMP_IDENT_WORK_DISTRIBUTE 3577 : isOpenMPLoopDirective(DKind) 3578 ? OMP_IDENT_WORK_LOOP 3579 : OMP_IDENT_WORK_SECTIONS), 3580 getThreadID(CGF, Loc)}; 3581 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3582 Args); 3583 } 3584 3585 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3586 SourceLocation Loc, 3587 unsigned IVSize, 3588 bool IVSigned) { 3589 if (!CGF.HaveInsertPoint()) 3590 return; 3591 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3592 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3593 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3594 } 3595 3596 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3597 SourceLocation Loc, unsigned IVSize, 3598 bool IVSigned, Address IL, 3599 Address LB, Address UB, 3600 Address ST) { 3601 // Call __kmpc_dispatch_next( 3602 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3603 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3604 // kmp_int[32|64] *p_stride); 3605 llvm::Value *Args[] = { 3606 emitUpdateLocation(CGF, Loc), 3607 getThreadID(CGF, Loc), 3608 IL.getPointer(), // &isLastIter 3609 LB.getPointer(), // &Lower 3610 UB.getPointer(), // &Upper 3611 ST.getPointer() // &Stride 3612 }; 3613 llvm::Value *Call = 3614 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3615 return CGF.EmitScalarConversion( 3616 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3617 CGF.getContext().BoolTy, Loc); 3618 } 3619 3620 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3621 llvm::Value *NumThreads, 3622 SourceLocation Loc) { 3623 if (!CGF.HaveInsertPoint()) 3624 return; 3625 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3626 llvm::Value *Args[] = { 3627 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3628 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3629 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3630 Args); 3631 } 3632 3633 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3634 OpenMPProcBindClauseKind ProcBind, 3635 SourceLocation Loc) { 3636 if (!CGF.HaveInsertPoint()) 3637 return; 3638 // Constants for proc bind value accepted by the runtime. 3639 enum ProcBindTy { 3640 ProcBindFalse = 0, 3641 ProcBindTrue, 3642 ProcBindMaster, 3643 ProcBindClose, 3644 ProcBindSpread, 3645 ProcBindIntel, 3646 ProcBindDefault 3647 } RuntimeProcBind; 3648 switch (ProcBind) { 3649 case OMPC_PROC_BIND_master: 3650 RuntimeProcBind = ProcBindMaster; 3651 break; 3652 case OMPC_PROC_BIND_close: 3653 RuntimeProcBind = ProcBindClose; 3654 break; 3655 case OMPC_PROC_BIND_spread: 3656 RuntimeProcBind = ProcBindSpread; 3657 break; 3658 case OMPC_PROC_BIND_unknown: 3659 llvm_unreachable("Unsupported proc_bind value."); 3660 } 3661 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3662 llvm::Value *Args[] = { 3663 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3664 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3665 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3666 } 3667 3668 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3669 SourceLocation Loc) { 3670 if (!CGF.HaveInsertPoint()) 3671 return; 3672 // Build call void __kmpc_flush(ident_t *loc) 3673 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3674 emitUpdateLocation(CGF, Loc)); 3675 } 3676 3677 namespace { 3678 /// Indexes of fields for type kmp_task_t. 3679 enum KmpTaskTFields { 3680 /// List of shared variables. 3681 KmpTaskTShareds, 3682 /// Task routine. 3683 KmpTaskTRoutine, 3684 /// Partition id for the untied tasks. 3685 KmpTaskTPartId, 3686 /// Function with call of destructors for private variables. 3687 Data1, 3688 /// Task priority. 3689 Data2, 3690 /// (Taskloops only) Lower bound. 3691 KmpTaskTLowerBound, 3692 /// (Taskloops only) Upper bound. 3693 KmpTaskTUpperBound, 3694 /// (Taskloops only) Stride. 3695 KmpTaskTStride, 3696 /// (Taskloops only) Is last iteration flag. 3697 KmpTaskTLastIter, 3698 /// (Taskloops only) Reduction data. 3699 KmpTaskTReductions, 3700 }; 3701 } // anonymous namespace 3702 3703 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3704 return OffloadEntriesTargetRegion.empty() && 3705 OffloadEntriesDeviceGlobalVar.empty(); 3706 } 3707 3708 /// Initialize target region entry. 3709 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3710 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3711 StringRef ParentName, unsigned LineNum, 3712 unsigned Order) { 3713 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3714 "only required for the device " 3715 "code generation."); 3716 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3717 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3718 OMPTargetRegionEntryTargetRegion); 3719 ++OffloadingEntriesNum; 3720 } 3721 3722 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3723 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3724 StringRef ParentName, unsigned LineNum, 3725 llvm::Constant *Addr, llvm::Constant *ID, 3726 OMPTargetRegionEntryKind Flags) { 3727 // If we are emitting code for a target, the entry is already initialized, 3728 // only has to be registered. 3729 if (CGM.getLangOpts().OpenMPIsDevice) { 3730 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3731 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3732 DiagnosticsEngine::Error, 3733 "Unable to find target region on line '%0' in the device code."); 3734 CGM.getDiags().Report(DiagID) << LineNum; 3735 return; 3736 } 3737 auto &Entry = 3738 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3739 assert(Entry.isValid() && "Entry not initialized!"); 3740 Entry.setAddress(Addr); 3741 Entry.setID(ID); 3742 Entry.setFlags(Flags); 3743 } else { 3744 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3745 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3746 ++OffloadingEntriesNum; 3747 } 3748 } 3749 3750 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3751 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3752 unsigned LineNum) const { 3753 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3754 if (PerDevice == OffloadEntriesTargetRegion.end()) 3755 return false; 3756 auto PerFile = PerDevice->second.find(FileID); 3757 if (PerFile == PerDevice->second.end()) 3758 return false; 3759 auto PerParentName = PerFile->second.find(ParentName); 3760 if (PerParentName == PerFile->second.end()) 3761 return false; 3762 auto PerLine = PerParentName->second.find(LineNum); 3763 if (PerLine == PerParentName->second.end()) 3764 return false; 3765 // Fail if this entry is already registered. 3766 if (PerLine->second.getAddress() || PerLine->second.getID()) 3767 return false; 3768 return true; 3769 } 3770 3771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3772 const OffloadTargetRegionEntryInfoActTy &Action) { 3773 // Scan all target region entries and perform the provided action. 3774 for (const auto &D : OffloadEntriesTargetRegion) 3775 for (const auto &F : D.second) 3776 for (const auto &P : F.second) 3777 for (const auto &L : P.second) 3778 Action(D.first, F.first, P.first(), L.first, L.second); 3779 } 3780 3781 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3782 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3783 OMPTargetGlobalVarEntryKind Flags, 3784 unsigned Order) { 3785 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3786 "only required for the device " 3787 "code generation."); 3788 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3789 ++OffloadingEntriesNum; 3790 } 3791 3792 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3793 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3794 CharUnits VarSize, 3795 OMPTargetGlobalVarEntryKind Flags, 3796 llvm::GlobalValue::LinkageTypes Linkage) { 3797 if (CGM.getLangOpts().OpenMPIsDevice) { 3798 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3799 assert(Entry.isValid() && Entry.getFlags() == Flags && 3800 "Entry not initialized!"); 3801 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3802 "Resetting with the new address."); 3803 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3804 if (Entry.getVarSize().isZero()) { 3805 Entry.setVarSize(VarSize); 3806 Entry.setLinkage(Linkage); 3807 } 3808 return; 3809 } 3810 Entry.setVarSize(VarSize); 3811 Entry.setLinkage(Linkage); 3812 Entry.setAddress(Addr); 3813 } else { 3814 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3815 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3816 assert(Entry.isValid() && Entry.getFlags() == Flags && 3817 "Entry not initialized!"); 3818 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3819 "Resetting with the new address."); 3820 if (Entry.getVarSize().isZero()) { 3821 Entry.setVarSize(VarSize); 3822 Entry.setLinkage(Linkage); 3823 } 3824 return; 3825 } 3826 OffloadEntriesDeviceGlobalVar.try_emplace( 3827 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3828 ++OffloadingEntriesNum; 3829 } 3830 } 3831 3832 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3833 actOnDeviceGlobalVarEntriesInfo( 3834 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3835 // Scan all target region entries and perform the provided action. 3836 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3837 Action(E.getKey(), E.getValue()); 3838 } 3839 3840 llvm::Function * 3841 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3842 // If we don't have entries or if we are emitting code for the device, we 3843 // don't need to do anything. 3844 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3845 return nullptr; 3846 3847 llvm::Module &M = CGM.getModule(); 3848 ASTContext &C = CGM.getContext(); 3849 3850 // Get list of devices we care about 3851 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3852 3853 // We should be creating an offloading descriptor only if there are devices 3854 // specified. 3855 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3856 3857 // Create the external variables that will point to the begin and end of the 3858 // host entries section. These will be defined by the linker. 3859 llvm::Type *OffloadEntryTy = 3860 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3861 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3862 auto *HostEntriesBegin = new llvm::GlobalVariable( 3863 M, OffloadEntryTy, /*isConstant=*/true, 3864 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3865 EntriesBeginName); 3866 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3867 auto *HostEntriesEnd = 3868 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3869 llvm::GlobalValue::ExternalLinkage, 3870 /*Initializer=*/nullptr, EntriesEndName); 3871 3872 // Create all device images 3873 auto *DeviceImageTy = cast<llvm::StructType>( 3874 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3875 ConstantInitBuilder DeviceImagesBuilder(CGM); 3876 ConstantArrayBuilder DeviceImagesEntries = 3877 DeviceImagesBuilder.beginArray(DeviceImageTy); 3878 3879 for (const llvm::Triple &Device : Devices) { 3880 StringRef T = Device.getTriple(); 3881 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3882 auto *ImgBegin = new llvm::GlobalVariable( 3883 M, CGM.Int8Ty, /*isConstant=*/true, 3884 llvm::GlobalValue::ExternalWeakLinkage, 3885 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3886 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3887 auto *ImgEnd = new llvm::GlobalVariable( 3888 M, CGM.Int8Ty, /*isConstant=*/true, 3889 llvm::GlobalValue::ExternalWeakLinkage, 3890 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3891 3892 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3893 HostEntriesEnd}; 3894 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3895 DeviceImagesEntries); 3896 } 3897 3898 // Create device images global array. 3899 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3900 llvm::GlobalVariable *DeviceImages = 3901 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 3902 CGM.getPointerAlign(), 3903 /*isConstant=*/true); 3904 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3905 3906 // This is a Zero array to be used in the creation of the constant expressions 3907 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3908 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3909 3910 // Create the target region descriptor. 3911 llvm::Constant *Data[] = { 3912 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 3913 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3914 DeviceImages, Index), 3915 HostEntriesBegin, HostEntriesEnd}; 3916 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 3917 llvm::GlobalVariable *Desc = createGlobalStruct( 3918 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 3919 3920 // Emit code to register or unregister the descriptor at execution 3921 // startup or closing, respectively. 3922 3923 llvm::Function *UnRegFn; 3924 { 3925 FunctionArgList Args; 3926 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3927 Args.push_back(&DummyPtr); 3928 3929 CodeGenFunction CGF(CGM); 3930 // Disable debug info for global (de-)initializer because they are not part 3931 // of some particular construct. 3932 CGF.disableDebugInfo(); 3933 const auto &FI = 3934 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3935 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3936 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 3937 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 3938 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 3939 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3940 Desc); 3941 CGF.FinishFunction(); 3942 } 3943 llvm::Function *RegFn; 3944 { 3945 CodeGenFunction CGF(CGM); 3946 // Disable debug info for global (de-)initializer because they are not part 3947 // of some particular construct. 3948 CGF.disableDebugInfo(); 3949 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 3950 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3951 3952 // Encode offload target triples into the registration function name. It 3953 // will serve as a comdat key for the registration/unregistration code for 3954 // this particular combination of offloading targets. 3955 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 3956 RegFnNameParts[0] = "omp_offloading"; 3957 RegFnNameParts[1] = "descriptor_reg"; 3958 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 3959 [](const llvm::Triple &T) -> const std::string& { 3960 return T.getTriple(); 3961 }); 3962 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 3963 std::string Descriptor = getName(RegFnNameParts); 3964 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 3965 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 3966 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 3967 // Create a variable to drive the registration and unregistration of the 3968 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3969 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 3970 SourceLocation(), nullptr, C.CharTy, 3971 ImplicitParamDecl::Other); 3972 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3973 CGF.FinishFunction(); 3974 } 3975 if (CGM.supportsCOMDAT()) { 3976 // It is sufficient to call registration function only once, so create a 3977 // COMDAT group for registration/unregistration functions and associated 3978 // data. That would reduce startup time and code size. Registration 3979 // function serves as a COMDAT group key. 3980 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3981 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3982 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3983 RegFn->setComdat(ComdatKey); 3984 UnRegFn->setComdat(ComdatKey); 3985 DeviceImages->setComdat(ComdatKey); 3986 Desc->setComdat(ComdatKey); 3987 } 3988 return RegFn; 3989 } 3990 3991 void CGOpenMPRuntime::createOffloadEntry( 3992 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3993 llvm::GlobalValue::LinkageTypes Linkage) { 3994 StringRef Name = Addr->getName(); 3995 llvm::Module &M = CGM.getModule(); 3996 llvm::LLVMContext &C = M.getContext(); 3997 3998 // Create constant string with the name. 3999 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4000 4001 std::string StringName = getName({"omp_offloading", "entry_name"}); 4002 auto *Str = new llvm::GlobalVariable( 4003 M, StrPtrInit->getType(), /*isConstant=*/true, 4004 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4005 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4006 4007 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4008 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4009 llvm::ConstantInt::get(CGM.SizeTy, Size), 4010 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4011 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4012 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4013 llvm::GlobalVariable *Entry = createGlobalStruct( 4014 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4015 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4016 4017 // The entry has to be created in the section the linker expects it to be. 4018 std::string Section = getName({"omp_offloading", "entries"}); 4019 Entry->setSection(Section); 4020 } 4021 4022 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4023 // Emit the offloading entries and metadata so that the device codegen side 4024 // can easily figure out what to emit. The produced metadata looks like 4025 // this: 4026 // 4027 // !omp_offload.info = !{!1, ...} 4028 // 4029 // Right now we only generate metadata for function that contain target 4030 // regions. 4031 4032 // If we do not have entries, we don't need to do anything. 4033 if (OffloadEntriesInfoManager.empty()) 4034 return; 4035 4036 llvm::Module &M = CGM.getModule(); 4037 llvm::LLVMContext &C = M.getContext(); 4038 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 4039 OrderedEntries(OffloadEntriesInfoManager.size()); 4040 llvm::SmallVector<StringRef, 16> ParentFunctions( 4041 OffloadEntriesInfoManager.size()); 4042 4043 // Auxiliary methods to create metadata values and strings. 4044 auto &&GetMDInt = [this](unsigned V) { 4045 return llvm::ConstantAsMetadata::get( 4046 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4047 }; 4048 4049 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4050 4051 // Create the offloading info metadata node. 4052 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4053 4054 // Create function that emits metadata for each target region entry; 4055 auto &&TargetRegionMetadataEmitter = 4056 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4057 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4058 unsigned Line, 4059 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4060 // Generate metadata for target regions. Each entry of this metadata 4061 // contains: 4062 // - Entry 0 -> Kind of this type of metadata (0). 4063 // - Entry 1 -> Device ID of the file where the entry was identified. 4064 // - Entry 2 -> File ID of the file where the entry was identified. 4065 // - Entry 3 -> Mangled name of the function where the entry was 4066 // identified. 4067 // - Entry 4 -> Line in the file where the entry was identified. 4068 // - Entry 5 -> Order the entry was created. 4069 // The first element of the metadata node is the kind. 4070 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4071 GetMDInt(FileID), GetMDString(ParentName), 4072 GetMDInt(Line), GetMDInt(E.getOrder())}; 4073 4074 // Save this entry in the right position of the ordered entries array. 4075 OrderedEntries[E.getOrder()] = &E; 4076 ParentFunctions[E.getOrder()] = ParentName; 4077 4078 // Add metadata to the named metadata node. 4079 MD->addOperand(llvm::MDNode::get(C, Ops)); 4080 }; 4081 4082 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4083 TargetRegionMetadataEmitter); 4084 4085 // Create function that emits metadata for each device global variable entry; 4086 auto &&DeviceGlobalVarMetadataEmitter = 4087 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4088 MD](StringRef MangledName, 4089 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4090 &E) { 4091 // Generate metadata for global variables. Each entry of this metadata 4092 // contains: 4093 // - Entry 0 -> Kind of this type of metadata (1). 4094 // - Entry 1 -> Mangled name of the variable. 4095 // - Entry 2 -> Declare target kind. 4096 // - Entry 3 -> Order the entry was created. 4097 // The first element of the metadata node is the kind. 4098 llvm::Metadata *Ops[] = { 4099 GetMDInt(E.getKind()), GetMDString(MangledName), 4100 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4101 4102 // Save this entry in the right position of the ordered entries array. 4103 OrderedEntries[E.getOrder()] = &E; 4104 4105 // Add metadata to the named metadata node. 4106 MD->addOperand(llvm::MDNode::get(C, Ops)); 4107 }; 4108 4109 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4110 DeviceGlobalVarMetadataEmitter); 4111 4112 for (const auto *E : OrderedEntries) { 4113 assert(E && "All ordered entries must exist!"); 4114 if (const auto *CE = 4115 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4116 E)) { 4117 if (!CE->getID() || !CE->getAddress()) { 4118 // Do not blame the entry if the parent funtion is not emitted. 4119 StringRef FnName = ParentFunctions[CE->getOrder()]; 4120 if (!CGM.GetGlobalValue(FnName)) 4121 continue; 4122 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4123 DiagnosticsEngine::Error, 4124 "Offloading entry for target region is incorrect: either the " 4125 "address or the ID is invalid."); 4126 CGM.getDiags().Report(DiagID); 4127 continue; 4128 } 4129 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4130 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4131 } else if (const auto *CE = 4132 dyn_cast<OffloadEntriesInfoManagerTy:: 4133 OffloadEntryInfoDeviceGlobalVar>(E)) { 4134 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4135 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4136 CE->getFlags()); 4137 switch (Flags) { 4138 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4139 if (!CE->getAddress()) { 4140 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4141 DiagnosticsEngine::Error, 4142 "Offloading entry for declare target variable is incorrect: the " 4143 "address is invalid."); 4144 CGM.getDiags().Report(DiagID); 4145 continue; 4146 } 4147 // The vaiable has no definition - no need to add the entry. 4148 if (CE->getVarSize().isZero()) 4149 continue; 4150 break; 4151 } 4152 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4153 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4154 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4155 "Declaret target link address is set."); 4156 if (CGM.getLangOpts().OpenMPIsDevice) 4157 continue; 4158 if (!CE->getAddress()) { 4159 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4160 DiagnosticsEngine::Error, 4161 "Offloading entry for declare target variable is incorrect: the " 4162 "address is invalid."); 4163 CGM.getDiags().Report(DiagID); 4164 continue; 4165 } 4166 break; 4167 } 4168 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4169 CE->getVarSize().getQuantity(), Flags, 4170 CE->getLinkage()); 4171 } else { 4172 llvm_unreachable("Unsupported entry kind."); 4173 } 4174 } 4175 } 4176 4177 /// Loads all the offload entries information from the host IR 4178 /// metadata. 4179 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4180 // If we are in target mode, load the metadata from the host IR. This code has 4181 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4182 4183 if (!CGM.getLangOpts().OpenMPIsDevice) 4184 return; 4185 4186 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4187 return; 4188 4189 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4190 if (auto EC = Buf.getError()) { 4191 CGM.getDiags().Report(diag::err_cannot_open_file) 4192 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4193 return; 4194 } 4195 4196 llvm::LLVMContext C; 4197 auto ME = expectedToErrorOrAndEmitErrors( 4198 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4199 4200 if (auto EC = ME.getError()) { 4201 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4202 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4203 CGM.getDiags().Report(DiagID) 4204 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4205 return; 4206 } 4207 4208 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4209 if (!MD) 4210 return; 4211 4212 for (llvm::MDNode *MN : MD->operands()) { 4213 auto &&GetMDInt = [MN](unsigned Idx) { 4214 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4215 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4216 }; 4217 4218 auto &&GetMDString = [MN](unsigned Idx) { 4219 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4220 return V->getString(); 4221 }; 4222 4223 switch (GetMDInt(0)) { 4224 default: 4225 llvm_unreachable("Unexpected metadata!"); 4226 break; 4227 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4228 OffloadingEntryInfoTargetRegion: 4229 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4230 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4231 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4232 /*Order=*/GetMDInt(5)); 4233 break; 4234 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4235 OffloadingEntryInfoDeviceGlobalVar: 4236 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4237 /*MangledName=*/GetMDString(1), 4238 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4239 /*Flags=*/GetMDInt(2)), 4240 /*Order=*/GetMDInt(3)); 4241 break; 4242 } 4243 } 4244 } 4245 4246 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4247 if (!KmpRoutineEntryPtrTy) { 4248 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4249 ASTContext &C = CGM.getContext(); 4250 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4251 FunctionProtoType::ExtProtoInfo EPI; 4252 KmpRoutineEntryPtrQTy = C.getPointerType( 4253 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4254 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4255 } 4256 } 4257 4258 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4259 // Make sure the type of the entry is already created. This is the type we 4260 // have to create: 4261 // struct __tgt_offload_entry{ 4262 // void *addr; // Pointer to the offload entry info. 4263 // // (function or global) 4264 // char *name; // Name of the function or global. 4265 // size_t size; // Size of the entry info (0 if it a function). 4266 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4267 // int32_t reserved; // Reserved, to use by the runtime library. 4268 // }; 4269 if (TgtOffloadEntryQTy.isNull()) { 4270 ASTContext &C = CGM.getContext(); 4271 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4272 RD->startDefinition(); 4273 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4274 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4275 addFieldToRecordDecl(C, RD, C.getSizeType()); 4276 addFieldToRecordDecl( 4277 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4278 addFieldToRecordDecl( 4279 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4280 RD->completeDefinition(); 4281 RD->addAttr(PackedAttr::CreateImplicit(C)); 4282 TgtOffloadEntryQTy = C.getRecordType(RD); 4283 } 4284 return TgtOffloadEntryQTy; 4285 } 4286 4287 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4288 // These are the types we need to build: 4289 // struct __tgt_device_image{ 4290 // void *ImageStart; // Pointer to the target code start. 4291 // void *ImageEnd; // Pointer to the target code end. 4292 // // We also add the host entries to the device image, as it may be useful 4293 // // for the target runtime to have access to that information. 4294 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4295 // // the entries. 4296 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4297 // // entries (non inclusive). 4298 // }; 4299 if (TgtDeviceImageQTy.isNull()) { 4300 ASTContext &C = CGM.getContext(); 4301 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4302 RD->startDefinition(); 4303 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4304 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4305 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4306 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4307 RD->completeDefinition(); 4308 TgtDeviceImageQTy = C.getRecordType(RD); 4309 } 4310 return TgtDeviceImageQTy; 4311 } 4312 4313 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4314 // struct __tgt_bin_desc{ 4315 // int32_t NumDevices; // Number of devices supported. 4316 // __tgt_device_image *DeviceImages; // Arrays of device images 4317 // // (one per device). 4318 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4319 // // entries. 4320 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4321 // // entries (non inclusive). 4322 // }; 4323 if (TgtBinaryDescriptorQTy.isNull()) { 4324 ASTContext &C = CGM.getContext(); 4325 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4326 RD->startDefinition(); 4327 addFieldToRecordDecl( 4328 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4329 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4330 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4331 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4332 RD->completeDefinition(); 4333 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4334 } 4335 return TgtBinaryDescriptorQTy; 4336 } 4337 4338 namespace { 4339 struct PrivateHelpersTy { 4340 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4341 const VarDecl *PrivateElemInit) 4342 : Original(Original), PrivateCopy(PrivateCopy), 4343 PrivateElemInit(PrivateElemInit) {} 4344 const VarDecl *Original; 4345 const VarDecl *PrivateCopy; 4346 const VarDecl *PrivateElemInit; 4347 }; 4348 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4349 } // anonymous namespace 4350 4351 static RecordDecl * 4352 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4353 if (!Privates.empty()) { 4354 ASTContext &C = CGM.getContext(); 4355 // Build struct .kmp_privates_t. { 4356 // /* private vars */ 4357 // }; 4358 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4359 RD->startDefinition(); 4360 for (const auto &Pair : Privates) { 4361 const VarDecl *VD = Pair.second.Original; 4362 QualType Type = VD->getType().getNonReferenceType(); 4363 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4364 if (VD->hasAttrs()) { 4365 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4366 E(VD->getAttrs().end()); 4367 I != E; ++I) 4368 FD->addAttr(*I); 4369 } 4370 } 4371 RD->completeDefinition(); 4372 return RD; 4373 } 4374 return nullptr; 4375 } 4376 4377 static RecordDecl * 4378 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4379 QualType KmpInt32Ty, 4380 QualType KmpRoutineEntryPointerQTy) { 4381 ASTContext &C = CGM.getContext(); 4382 // Build struct kmp_task_t { 4383 // void * shareds; 4384 // kmp_routine_entry_t routine; 4385 // kmp_int32 part_id; 4386 // kmp_cmplrdata_t data1; 4387 // kmp_cmplrdata_t data2; 4388 // For taskloops additional fields: 4389 // kmp_uint64 lb; 4390 // kmp_uint64 ub; 4391 // kmp_int64 st; 4392 // kmp_int32 liter; 4393 // void * reductions; 4394 // }; 4395 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4396 UD->startDefinition(); 4397 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4398 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4399 UD->completeDefinition(); 4400 QualType KmpCmplrdataTy = C.getRecordType(UD); 4401 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4402 RD->startDefinition(); 4403 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4404 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4405 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4406 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4407 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4408 if (isOpenMPTaskLoopDirective(Kind)) { 4409 QualType KmpUInt64Ty = 4410 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4411 QualType KmpInt64Ty = 4412 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4413 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4414 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4415 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4416 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4417 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4418 } 4419 RD->completeDefinition(); 4420 return RD; 4421 } 4422 4423 static RecordDecl * 4424 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4425 ArrayRef<PrivateDataTy> Privates) { 4426 ASTContext &C = CGM.getContext(); 4427 // Build struct kmp_task_t_with_privates { 4428 // kmp_task_t task_data; 4429 // .kmp_privates_t. privates; 4430 // }; 4431 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4432 RD->startDefinition(); 4433 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4434 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4435 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4436 RD->completeDefinition(); 4437 return RD; 4438 } 4439 4440 /// Emit a proxy function which accepts kmp_task_t as the second 4441 /// argument. 4442 /// \code 4443 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4444 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4445 /// For taskloops: 4446 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4447 /// tt->reductions, tt->shareds); 4448 /// return 0; 4449 /// } 4450 /// \endcode 4451 static llvm::Function * 4452 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4453 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4454 QualType KmpTaskTWithPrivatesPtrQTy, 4455 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4456 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4457 llvm::Value *TaskPrivatesMap) { 4458 ASTContext &C = CGM.getContext(); 4459 FunctionArgList Args; 4460 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4461 ImplicitParamDecl::Other); 4462 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4463 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4464 ImplicitParamDecl::Other); 4465 Args.push_back(&GtidArg); 4466 Args.push_back(&TaskTypeArg); 4467 const auto &TaskEntryFnInfo = 4468 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4469 llvm::FunctionType *TaskEntryTy = 4470 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4471 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4472 auto *TaskEntry = llvm::Function::Create( 4473 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4474 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4475 TaskEntry->setDoesNotRecurse(); 4476 CodeGenFunction CGF(CGM); 4477 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4478 Loc, Loc); 4479 4480 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4481 // tt, 4482 // For taskloops: 4483 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4484 // tt->task_data.shareds); 4485 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4486 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4487 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4488 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4489 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4490 const auto *KmpTaskTWithPrivatesQTyRD = 4491 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4492 LValue Base = 4493 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4494 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4495 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4496 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4497 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4498 4499 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4500 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4501 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4502 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4503 CGF.ConvertTypeForMem(SharedsPtrTy)); 4504 4505 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4506 llvm::Value *PrivatesParam; 4507 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4508 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4509 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4510 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4511 } else { 4512 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4513 } 4514 4515 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4516 TaskPrivatesMap, 4517 CGF.Builder 4518 .CreatePointerBitCastOrAddrSpaceCast( 4519 TDBase.getAddress(), CGF.VoidPtrTy) 4520 .getPointer()}; 4521 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4522 std::end(CommonArgs)); 4523 if (isOpenMPTaskLoopDirective(Kind)) { 4524 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4525 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4526 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4527 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4528 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4529 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4530 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4531 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4532 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4533 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4534 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4535 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4536 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4537 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4538 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4539 CallArgs.push_back(LBParam); 4540 CallArgs.push_back(UBParam); 4541 CallArgs.push_back(StParam); 4542 CallArgs.push_back(LIParam); 4543 CallArgs.push_back(RParam); 4544 } 4545 CallArgs.push_back(SharedsParam); 4546 4547 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4548 CallArgs); 4549 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4550 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4551 CGF.FinishFunction(); 4552 return TaskEntry; 4553 } 4554 4555 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4556 SourceLocation Loc, 4557 QualType KmpInt32Ty, 4558 QualType KmpTaskTWithPrivatesPtrQTy, 4559 QualType KmpTaskTWithPrivatesQTy) { 4560 ASTContext &C = CGM.getContext(); 4561 FunctionArgList Args; 4562 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4563 ImplicitParamDecl::Other); 4564 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4565 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4566 ImplicitParamDecl::Other); 4567 Args.push_back(&GtidArg); 4568 Args.push_back(&TaskTypeArg); 4569 const auto &DestructorFnInfo = 4570 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4571 llvm::FunctionType *DestructorFnTy = 4572 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4573 std::string Name = 4574 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4575 auto *DestructorFn = 4576 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4577 Name, &CGM.getModule()); 4578 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4579 DestructorFnInfo); 4580 DestructorFn->setDoesNotRecurse(); 4581 CodeGenFunction CGF(CGM); 4582 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4583 Args, Loc, Loc); 4584 4585 LValue Base = CGF.EmitLoadOfPointerLValue( 4586 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4587 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4588 const auto *KmpTaskTWithPrivatesQTyRD = 4589 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4590 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4591 Base = CGF.EmitLValueForField(Base, *FI); 4592 for (const auto *Field : 4593 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4594 if (QualType::DestructionKind DtorKind = 4595 Field->getType().isDestructedType()) { 4596 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4597 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4598 } 4599 } 4600 CGF.FinishFunction(); 4601 return DestructorFn; 4602 } 4603 4604 /// Emit a privates mapping function for correct handling of private and 4605 /// firstprivate variables. 4606 /// \code 4607 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4608 /// **noalias priv1,..., <tyn> **noalias privn) { 4609 /// *priv1 = &.privates.priv1; 4610 /// ...; 4611 /// *privn = &.privates.privn; 4612 /// } 4613 /// \endcode 4614 static llvm::Value * 4615 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4616 ArrayRef<const Expr *> PrivateVars, 4617 ArrayRef<const Expr *> FirstprivateVars, 4618 ArrayRef<const Expr *> LastprivateVars, 4619 QualType PrivatesQTy, 4620 ArrayRef<PrivateDataTy> Privates) { 4621 ASTContext &C = CGM.getContext(); 4622 FunctionArgList Args; 4623 ImplicitParamDecl TaskPrivatesArg( 4624 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4625 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4626 ImplicitParamDecl::Other); 4627 Args.push_back(&TaskPrivatesArg); 4628 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4629 unsigned Counter = 1; 4630 for (const Expr *E : PrivateVars) { 4631 Args.push_back(ImplicitParamDecl::Create( 4632 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4633 C.getPointerType(C.getPointerType(E->getType())) 4634 .withConst() 4635 .withRestrict(), 4636 ImplicitParamDecl::Other)); 4637 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4638 PrivateVarsPos[VD] = Counter; 4639 ++Counter; 4640 } 4641 for (const Expr *E : FirstprivateVars) { 4642 Args.push_back(ImplicitParamDecl::Create( 4643 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4644 C.getPointerType(C.getPointerType(E->getType())) 4645 .withConst() 4646 .withRestrict(), 4647 ImplicitParamDecl::Other)); 4648 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4649 PrivateVarsPos[VD] = Counter; 4650 ++Counter; 4651 } 4652 for (const Expr *E : LastprivateVars) { 4653 Args.push_back(ImplicitParamDecl::Create( 4654 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4655 C.getPointerType(C.getPointerType(E->getType())) 4656 .withConst() 4657 .withRestrict(), 4658 ImplicitParamDecl::Other)); 4659 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4660 PrivateVarsPos[VD] = Counter; 4661 ++Counter; 4662 } 4663 const auto &TaskPrivatesMapFnInfo = 4664 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4665 llvm::FunctionType *TaskPrivatesMapTy = 4666 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4667 std::string Name = 4668 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4669 auto *TaskPrivatesMap = llvm::Function::Create( 4670 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4671 &CGM.getModule()); 4672 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4673 TaskPrivatesMapFnInfo); 4674 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4675 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4676 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4677 CodeGenFunction CGF(CGM); 4678 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4679 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4680 4681 // *privi = &.privates.privi; 4682 LValue Base = CGF.EmitLoadOfPointerLValue( 4683 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4684 TaskPrivatesArg.getType()->castAs<PointerType>()); 4685 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4686 Counter = 0; 4687 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4688 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4689 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4690 LValue RefLVal = 4691 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4692 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4693 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4694 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4695 ++Counter; 4696 } 4697 CGF.FinishFunction(); 4698 return TaskPrivatesMap; 4699 } 4700 4701 static bool stable_sort_comparator(const PrivateDataTy P1, 4702 const PrivateDataTy P2) { 4703 return P1.first > P2.first; 4704 } 4705 4706 /// Emit initialization for private variables in task-based directives. 4707 static void emitPrivatesInit(CodeGenFunction &CGF, 4708 const OMPExecutableDirective &D, 4709 Address KmpTaskSharedsPtr, LValue TDBase, 4710 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4711 QualType SharedsTy, QualType SharedsPtrTy, 4712 const OMPTaskDataTy &Data, 4713 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4714 ASTContext &C = CGF.getContext(); 4715 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4716 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4717 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4718 ? OMPD_taskloop 4719 : OMPD_task; 4720 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4721 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4722 LValue SrcBase; 4723 bool IsTargetTask = 4724 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4725 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4726 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4727 // PointersArray and SizesArray. The original variables for these arrays are 4728 // not captured and we get their addresses explicitly. 4729 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4730 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4731 SrcBase = CGF.MakeAddrLValue( 4732 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4733 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4734 SharedsTy); 4735 } 4736 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4737 for (const PrivateDataTy &Pair : Privates) { 4738 const VarDecl *VD = Pair.second.PrivateCopy; 4739 const Expr *Init = VD->getAnyInitializer(); 4740 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4741 !CGF.isTrivialInitializer(Init)))) { 4742 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4743 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4744 const VarDecl *OriginalVD = Pair.second.Original; 4745 // Check if the variable is the target-based BasePointersArray, 4746 // PointersArray or SizesArray. 4747 LValue SharedRefLValue; 4748 QualType Type = PrivateLValue.getType(); 4749 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4750 if (IsTargetTask && !SharedField) { 4751 assert(isa<ImplicitParamDecl>(OriginalVD) && 4752 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4753 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4754 ->getNumParams() == 0 && 4755 isa<TranslationUnitDecl>( 4756 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4757 ->getDeclContext()) && 4758 "Expected artificial target data variable."); 4759 SharedRefLValue = 4760 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4761 } else { 4762 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4763 SharedRefLValue = CGF.MakeAddrLValue( 4764 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4765 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4766 SharedRefLValue.getTBAAInfo()); 4767 } 4768 if (Type->isArrayType()) { 4769 // Initialize firstprivate array. 4770 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4771 // Perform simple memcpy. 4772 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4773 } else { 4774 // Initialize firstprivate array using element-by-element 4775 // initialization. 4776 CGF.EmitOMPAggregateAssign( 4777 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4778 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4779 Address SrcElement) { 4780 // Clean up any temporaries needed by the initialization. 4781 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4782 InitScope.addPrivate( 4783 Elem, [SrcElement]() -> Address { return SrcElement; }); 4784 (void)InitScope.Privatize(); 4785 // Emit initialization for single element. 4786 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4787 CGF, &CapturesInfo); 4788 CGF.EmitAnyExprToMem(Init, DestElement, 4789 Init->getType().getQualifiers(), 4790 /*IsInitializer=*/false); 4791 }); 4792 } 4793 } else { 4794 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4795 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4796 return SharedRefLValue.getAddress(); 4797 }); 4798 (void)InitScope.Privatize(); 4799 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4800 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4801 /*capturedByInit=*/false); 4802 } 4803 } else { 4804 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4805 } 4806 } 4807 ++FI; 4808 } 4809 } 4810 4811 /// Check if duplication function is required for taskloops. 4812 static bool checkInitIsRequired(CodeGenFunction &CGF, 4813 ArrayRef<PrivateDataTy> Privates) { 4814 bool InitRequired = false; 4815 for (const PrivateDataTy &Pair : Privates) { 4816 const VarDecl *VD = Pair.second.PrivateCopy; 4817 const Expr *Init = VD->getAnyInitializer(); 4818 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4819 !CGF.isTrivialInitializer(Init)); 4820 if (InitRequired) 4821 break; 4822 } 4823 return InitRequired; 4824 } 4825 4826 4827 /// Emit task_dup function (for initialization of 4828 /// private/firstprivate/lastprivate vars and last_iter flag) 4829 /// \code 4830 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4831 /// lastpriv) { 4832 /// // setup lastprivate flag 4833 /// task_dst->last = lastpriv; 4834 /// // could be constructor calls here... 4835 /// } 4836 /// \endcode 4837 static llvm::Value * 4838 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4839 const OMPExecutableDirective &D, 4840 QualType KmpTaskTWithPrivatesPtrQTy, 4841 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4842 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4843 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4844 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4845 ASTContext &C = CGM.getContext(); 4846 FunctionArgList Args; 4847 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4848 KmpTaskTWithPrivatesPtrQTy, 4849 ImplicitParamDecl::Other); 4850 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4851 KmpTaskTWithPrivatesPtrQTy, 4852 ImplicitParamDecl::Other); 4853 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4854 ImplicitParamDecl::Other); 4855 Args.push_back(&DstArg); 4856 Args.push_back(&SrcArg); 4857 Args.push_back(&LastprivArg); 4858 const auto &TaskDupFnInfo = 4859 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4860 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4861 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4862 auto *TaskDup = llvm::Function::Create( 4863 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4864 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4865 TaskDup->setDoesNotRecurse(); 4866 CodeGenFunction CGF(CGM); 4867 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4868 Loc); 4869 4870 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4871 CGF.GetAddrOfLocalVar(&DstArg), 4872 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4873 // task_dst->liter = lastpriv; 4874 if (WithLastIter) { 4875 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4876 LValue Base = CGF.EmitLValueForField( 4877 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4878 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4879 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4880 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4881 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4882 } 4883 4884 // Emit initial values for private copies (if any). 4885 assert(!Privates.empty()); 4886 Address KmpTaskSharedsPtr = Address::invalid(); 4887 if (!Data.FirstprivateVars.empty()) { 4888 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4889 CGF.GetAddrOfLocalVar(&SrcArg), 4890 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4891 LValue Base = CGF.EmitLValueForField( 4892 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4893 KmpTaskSharedsPtr = Address( 4894 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4895 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4896 KmpTaskTShareds)), 4897 Loc), 4898 CGF.getNaturalTypeAlignment(SharedsTy)); 4899 } 4900 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4901 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4902 CGF.FinishFunction(); 4903 return TaskDup; 4904 } 4905 4906 /// Checks if destructor function is required to be generated. 4907 /// \return true if cleanups are required, false otherwise. 4908 static bool 4909 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4910 bool NeedsCleanup = false; 4911 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4912 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4913 for (const FieldDecl *FD : PrivateRD->fields()) { 4914 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4915 if (NeedsCleanup) 4916 break; 4917 } 4918 return NeedsCleanup; 4919 } 4920 4921 CGOpenMPRuntime::TaskResultTy 4922 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4923 const OMPExecutableDirective &D, 4924 llvm::Function *TaskFunction, QualType SharedsTy, 4925 Address Shareds, const OMPTaskDataTy &Data) { 4926 ASTContext &C = CGM.getContext(); 4927 llvm::SmallVector<PrivateDataTy, 4> Privates; 4928 // Aggregate privates and sort them by the alignment. 4929 auto I = Data.PrivateCopies.begin(); 4930 for (const Expr *E : Data.PrivateVars) { 4931 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4932 Privates.emplace_back( 4933 C.getDeclAlign(VD), 4934 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4935 /*PrivateElemInit=*/nullptr)); 4936 ++I; 4937 } 4938 I = Data.FirstprivateCopies.begin(); 4939 auto IElemInitRef = Data.FirstprivateInits.begin(); 4940 for (const Expr *E : Data.FirstprivateVars) { 4941 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4942 Privates.emplace_back( 4943 C.getDeclAlign(VD), 4944 PrivateHelpersTy( 4945 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4946 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4947 ++I; 4948 ++IElemInitRef; 4949 } 4950 I = Data.LastprivateCopies.begin(); 4951 for (const Expr *E : Data.LastprivateVars) { 4952 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4953 Privates.emplace_back( 4954 C.getDeclAlign(VD), 4955 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4956 /*PrivateElemInit=*/nullptr)); 4957 ++I; 4958 } 4959 std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); 4960 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4961 // Build type kmp_routine_entry_t (if not built yet). 4962 emitKmpRoutineEntryT(KmpInt32Ty); 4963 // Build type kmp_task_t (if not built yet). 4964 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4965 if (SavedKmpTaskloopTQTy.isNull()) { 4966 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4967 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4968 } 4969 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4970 } else { 4971 assert((D.getDirectiveKind() == OMPD_task || 4972 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4973 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4974 "Expected taskloop, task or target directive"); 4975 if (SavedKmpTaskTQTy.isNull()) { 4976 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4977 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4978 } 4979 KmpTaskTQTy = SavedKmpTaskTQTy; 4980 } 4981 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4982 // Build particular struct kmp_task_t for the given task. 4983 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4984 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4985 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4986 QualType KmpTaskTWithPrivatesPtrQTy = 4987 C.getPointerType(KmpTaskTWithPrivatesQTy); 4988 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4989 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4990 KmpTaskTWithPrivatesTy->getPointerTo(); 4991 llvm::Value *KmpTaskTWithPrivatesTySize = 4992 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4993 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4994 4995 // Emit initial values for private copies (if any). 4996 llvm::Value *TaskPrivatesMap = nullptr; 4997 llvm::Type *TaskPrivatesMapTy = 4998 std::next(TaskFunction->arg_begin(), 3)->getType(); 4999 if (!Privates.empty()) { 5000 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5001 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5002 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5003 FI->getType(), Privates); 5004 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5005 TaskPrivatesMap, TaskPrivatesMapTy); 5006 } else { 5007 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5008 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5009 } 5010 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5011 // kmp_task_t *tt); 5012 llvm::Function *TaskEntry = emitProxyTaskFunction( 5013 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5014 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5015 TaskPrivatesMap); 5016 5017 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5018 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5019 // kmp_routine_entry_t *task_entry); 5020 // Task flags. Format is taken from 5021 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5022 // description of kmp_tasking_flags struct. 5023 enum { 5024 TiedFlag = 0x1, 5025 FinalFlag = 0x2, 5026 DestructorsFlag = 0x8, 5027 PriorityFlag = 0x20 5028 }; 5029 unsigned Flags = Data.Tied ? TiedFlag : 0; 5030 bool NeedsCleanup = false; 5031 if (!Privates.empty()) { 5032 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5033 if (NeedsCleanup) 5034 Flags = Flags | DestructorsFlag; 5035 } 5036 if (Data.Priority.getInt()) 5037 Flags = Flags | PriorityFlag; 5038 llvm::Value *TaskFlags = 5039 Data.Final.getPointer() 5040 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5041 CGF.Builder.getInt32(FinalFlag), 5042 CGF.Builder.getInt32(/*C=*/0)) 5043 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5044 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5045 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5046 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 5047 getThreadID(CGF, Loc), TaskFlags, 5048 KmpTaskTWithPrivatesTySize, SharedsSize, 5049 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5050 TaskEntry, KmpRoutineEntryPtrTy)}; 5051 llvm::Value *NewTask = CGF.EmitRuntimeCall( 5052 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5053 llvm::Value *NewTaskNewTaskTTy = 5054 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5055 NewTask, KmpTaskTWithPrivatesPtrTy); 5056 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5057 KmpTaskTWithPrivatesQTy); 5058 LValue TDBase = 5059 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5060 // Fill the data in the resulting kmp_task_t record. 5061 // Copy shareds if there are any. 5062 Address KmpTaskSharedsPtr = Address::invalid(); 5063 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5064 KmpTaskSharedsPtr = 5065 Address(CGF.EmitLoadOfScalar( 5066 CGF.EmitLValueForField( 5067 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5068 KmpTaskTShareds)), 5069 Loc), 5070 CGF.getNaturalTypeAlignment(SharedsTy)); 5071 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5072 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5073 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5074 } 5075 // Emit initial values for private copies (if any). 5076 TaskResultTy Result; 5077 if (!Privates.empty()) { 5078 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5079 SharedsTy, SharedsPtrTy, Data, Privates, 5080 /*ForDup=*/false); 5081 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5082 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5083 Result.TaskDupFn = emitTaskDupFunction( 5084 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5085 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5086 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5087 } 5088 } 5089 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5090 enum { Priority = 0, Destructors = 1 }; 5091 // Provide pointer to function with destructors for privates. 5092 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5093 const RecordDecl *KmpCmplrdataUD = 5094 (*FI)->getType()->getAsUnionType()->getDecl(); 5095 if (NeedsCleanup) { 5096 llvm::Value *DestructorFn = emitDestructorsFunction( 5097 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5098 KmpTaskTWithPrivatesQTy); 5099 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5100 LValue DestructorsLV = CGF.EmitLValueForField( 5101 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5102 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5103 DestructorFn, KmpRoutineEntryPtrTy), 5104 DestructorsLV); 5105 } 5106 // Set priority. 5107 if (Data.Priority.getInt()) { 5108 LValue Data2LV = CGF.EmitLValueForField( 5109 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5110 LValue PriorityLV = CGF.EmitLValueForField( 5111 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5112 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5113 } 5114 Result.NewTask = NewTask; 5115 Result.TaskEntry = TaskEntry; 5116 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5117 Result.TDBase = TDBase; 5118 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5119 return Result; 5120 } 5121 5122 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5123 const OMPExecutableDirective &D, 5124 llvm::Function *TaskFunction, 5125 QualType SharedsTy, Address Shareds, 5126 const Expr *IfCond, 5127 const OMPTaskDataTy &Data) { 5128 if (!CGF.HaveInsertPoint()) 5129 return; 5130 5131 TaskResultTy Result = 5132 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5133 llvm::Value *NewTask = Result.NewTask; 5134 llvm::Function *TaskEntry = Result.TaskEntry; 5135 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5136 LValue TDBase = Result.TDBase; 5137 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5138 ASTContext &C = CGM.getContext(); 5139 // Process list of dependences. 5140 Address DependenciesArray = Address::invalid(); 5141 unsigned NumDependencies = Data.Dependences.size(); 5142 if (NumDependencies) { 5143 // Dependence kind for RTL. 5144 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5145 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5146 RecordDecl *KmpDependInfoRD; 5147 QualType FlagsTy = 5148 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5149 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5150 if (KmpDependInfoTy.isNull()) { 5151 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5152 KmpDependInfoRD->startDefinition(); 5153 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5154 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5155 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5156 KmpDependInfoRD->completeDefinition(); 5157 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5158 } else { 5159 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5160 } 5161 // Define type kmp_depend_info[<Dependences.size()>]; 5162 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5163 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5164 ArrayType::Normal, /*IndexTypeQuals=*/0); 5165 // kmp_depend_info[<Dependences.size()>] deps; 5166 DependenciesArray = 5167 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5168 for (unsigned I = 0; I < NumDependencies; ++I) { 5169 const Expr *E = Data.Dependences[I].second; 5170 LValue Addr = CGF.EmitLValue(E); 5171 llvm::Value *Size; 5172 QualType Ty = E->getType(); 5173 if (const auto *ASE = 5174 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5175 LValue UpAddrLVal = 5176 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 5177 llvm::Value *UpAddr = 5178 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5179 llvm::Value *LowIntPtr = 5180 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5181 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5182 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5183 } else { 5184 Size = CGF.getTypeSize(Ty); 5185 } 5186 LValue Base = CGF.MakeAddrLValue( 5187 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5188 KmpDependInfoTy); 5189 // deps[i].base_addr = &<Dependences[i].second>; 5190 LValue BaseAddrLVal = CGF.EmitLValueForField( 5191 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5192 CGF.EmitStoreOfScalar( 5193 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5194 BaseAddrLVal); 5195 // deps[i].len = sizeof(<Dependences[i].second>); 5196 LValue LenLVal = CGF.EmitLValueForField( 5197 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5198 CGF.EmitStoreOfScalar(Size, LenLVal); 5199 // deps[i].flags = <Dependences[i].first>; 5200 RTLDependenceKindTy DepKind; 5201 switch (Data.Dependences[I].first) { 5202 case OMPC_DEPEND_in: 5203 DepKind = DepIn; 5204 break; 5205 // Out and InOut dependencies must use the same code. 5206 case OMPC_DEPEND_out: 5207 case OMPC_DEPEND_inout: 5208 DepKind = DepInOut; 5209 break; 5210 case OMPC_DEPEND_mutexinoutset: 5211 DepKind = DepMutexInOutSet; 5212 break; 5213 case OMPC_DEPEND_source: 5214 case OMPC_DEPEND_sink: 5215 case OMPC_DEPEND_unknown: 5216 llvm_unreachable("Unknown task dependence type"); 5217 } 5218 LValue FlagsLVal = CGF.EmitLValueForField( 5219 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5220 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5221 FlagsLVal); 5222 } 5223 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5224 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5225 } 5226 5227 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5228 // libcall. 5229 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5230 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5231 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5232 // list is not empty 5233 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5234 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5235 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5236 llvm::Value *DepTaskArgs[7]; 5237 if (NumDependencies) { 5238 DepTaskArgs[0] = UpLoc; 5239 DepTaskArgs[1] = ThreadID; 5240 DepTaskArgs[2] = NewTask; 5241 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5242 DepTaskArgs[4] = DependenciesArray.getPointer(); 5243 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5244 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5245 } 5246 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5247 &TaskArgs, 5248 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5249 if (!Data.Tied) { 5250 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5251 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5252 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5253 } 5254 if (NumDependencies) { 5255 CGF.EmitRuntimeCall( 5256 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5257 } else { 5258 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5259 TaskArgs); 5260 } 5261 // Check if parent region is untied and build return for untied task; 5262 if (auto *Region = 5263 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5264 Region->emitUntiedSwitch(CGF); 5265 }; 5266 5267 llvm::Value *DepWaitTaskArgs[6]; 5268 if (NumDependencies) { 5269 DepWaitTaskArgs[0] = UpLoc; 5270 DepWaitTaskArgs[1] = ThreadID; 5271 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5272 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5273 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5274 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5275 } 5276 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5277 NumDependencies, &DepWaitTaskArgs, 5278 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5279 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5280 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5281 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5282 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5283 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5284 // is specified. 5285 if (NumDependencies) 5286 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5287 DepWaitTaskArgs); 5288 // Call proxy_task_entry(gtid, new_task); 5289 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5290 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5291 Action.Enter(CGF); 5292 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5293 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5294 OutlinedFnArgs); 5295 }; 5296 5297 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5298 // kmp_task_t *new_task); 5299 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5300 // kmp_task_t *new_task); 5301 RegionCodeGenTy RCG(CodeGen); 5302 CommonActionTy Action( 5303 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5304 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5305 RCG.setAction(Action); 5306 RCG(CGF); 5307 }; 5308 5309 if (IfCond) { 5310 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5311 } else { 5312 RegionCodeGenTy ThenRCG(ThenCodeGen); 5313 ThenRCG(CGF); 5314 } 5315 } 5316 5317 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5318 const OMPLoopDirective &D, 5319 llvm::Function *TaskFunction, 5320 QualType SharedsTy, Address Shareds, 5321 const Expr *IfCond, 5322 const OMPTaskDataTy &Data) { 5323 if (!CGF.HaveInsertPoint()) 5324 return; 5325 TaskResultTy Result = 5326 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5327 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5328 // libcall. 5329 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5330 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5331 // sched, kmp_uint64 grainsize, void *task_dup); 5332 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5333 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5334 llvm::Value *IfVal; 5335 if (IfCond) { 5336 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5337 /*isSigned=*/true); 5338 } else { 5339 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5340 } 5341 5342 LValue LBLVal = CGF.EmitLValueForField( 5343 Result.TDBase, 5344 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5345 const auto *LBVar = 5346 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5347 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5348 /*IsInitializer=*/true); 5349 LValue UBLVal = CGF.EmitLValueForField( 5350 Result.TDBase, 5351 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5352 const auto *UBVar = 5353 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5354 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5355 /*IsInitializer=*/true); 5356 LValue StLVal = CGF.EmitLValueForField( 5357 Result.TDBase, 5358 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5359 const auto *StVar = 5360 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5361 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5362 /*IsInitializer=*/true); 5363 // Store reductions address. 5364 LValue RedLVal = CGF.EmitLValueForField( 5365 Result.TDBase, 5366 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5367 if (Data.Reductions) { 5368 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5369 } else { 5370 CGF.EmitNullInitialization(RedLVal.getAddress(), 5371 CGF.getContext().VoidPtrTy); 5372 } 5373 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5374 llvm::Value *TaskArgs[] = { 5375 UpLoc, 5376 ThreadID, 5377 Result.NewTask, 5378 IfVal, 5379 LBLVal.getPointer(), 5380 UBLVal.getPointer(), 5381 CGF.EmitLoadOfScalar(StLVal, Loc), 5382 llvm::ConstantInt::getSigned( 5383 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5384 llvm::ConstantInt::getSigned( 5385 CGF.IntTy, Data.Schedule.getPointer() 5386 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5387 : NoSchedule), 5388 Data.Schedule.getPointer() 5389 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5390 /*isSigned=*/false) 5391 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5392 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5393 Result.TaskDupFn, CGF.VoidPtrTy) 5394 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5395 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5396 } 5397 5398 /// Emit reduction operation for each element of array (required for 5399 /// array sections) LHS op = RHS. 5400 /// \param Type Type of array. 5401 /// \param LHSVar Variable on the left side of the reduction operation 5402 /// (references element of array in original variable). 5403 /// \param RHSVar Variable on the right side of the reduction operation 5404 /// (references element of array in original variable). 5405 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5406 /// RHSVar. 5407 static void EmitOMPAggregateReduction( 5408 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5409 const VarDecl *RHSVar, 5410 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5411 const Expr *, const Expr *)> &RedOpGen, 5412 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5413 const Expr *UpExpr = nullptr) { 5414 // Perform element-by-element initialization. 5415 QualType ElementTy; 5416 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5417 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5418 5419 // Drill down to the base element type on both arrays. 5420 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5421 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5422 5423 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5424 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5425 // Cast from pointer to array type to pointer to single element. 5426 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5427 // The basic structure here is a while-do loop. 5428 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5429 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5430 llvm::Value *IsEmpty = 5431 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5432 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5433 5434 // Enter the loop body, making that address the current address. 5435 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5436 CGF.EmitBlock(BodyBB); 5437 5438 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5439 5440 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5441 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5442 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5443 Address RHSElementCurrent = 5444 Address(RHSElementPHI, 5445 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5446 5447 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5448 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5449 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5450 Address LHSElementCurrent = 5451 Address(LHSElementPHI, 5452 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5453 5454 // Emit copy. 5455 CodeGenFunction::OMPPrivateScope Scope(CGF); 5456 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5457 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5458 Scope.Privatize(); 5459 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5460 Scope.ForceCleanup(); 5461 5462 // Shift the address forward by one element. 5463 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5464 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5465 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5466 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5467 // Check whether we've reached the end. 5468 llvm::Value *Done = 5469 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5470 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5471 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5472 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5473 5474 // Done. 5475 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5476 } 5477 5478 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5479 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5480 /// UDR combiner function. 5481 static void emitReductionCombiner(CodeGenFunction &CGF, 5482 const Expr *ReductionOp) { 5483 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5484 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5485 if (const auto *DRE = 5486 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5487 if (const auto *DRD = 5488 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5489 std::pair<llvm::Function *, llvm::Function *> Reduction = 5490 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5491 RValue Func = RValue::get(Reduction.first); 5492 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5493 CGF.EmitIgnoredExpr(ReductionOp); 5494 return; 5495 } 5496 CGF.EmitIgnoredExpr(ReductionOp); 5497 } 5498 5499 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5500 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5501 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5502 ArrayRef<const Expr *> ReductionOps) { 5503 ASTContext &C = CGM.getContext(); 5504 5505 // void reduction_func(void *LHSArg, void *RHSArg); 5506 FunctionArgList Args; 5507 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5508 ImplicitParamDecl::Other); 5509 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5510 ImplicitParamDecl::Other); 5511 Args.push_back(&LHSArg); 5512 Args.push_back(&RHSArg); 5513 const auto &CGFI = 5514 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5515 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5516 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5517 llvm::GlobalValue::InternalLinkage, Name, 5518 &CGM.getModule()); 5519 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5520 Fn->setDoesNotRecurse(); 5521 CodeGenFunction CGF(CGM); 5522 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5523 5524 // Dst = (void*[n])(LHSArg); 5525 // Src = (void*[n])(RHSArg); 5526 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5527 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5528 ArgsType), CGF.getPointerAlign()); 5529 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5530 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5531 ArgsType), CGF.getPointerAlign()); 5532 5533 // ... 5534 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5535 // ... 5536 CodeGenFunction::OMPPrivateScope Scope(CGF); 5537 auto IPriv = Privates.begin(); 5538 unsigned Idx = 0; 5539 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5540 const auto *RHSVar = 5541 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5542 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5543 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5544 }); 5545 const auto *LHSVar = 5546 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5547 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5548 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5549 }); 5550 QualType PrivTy = (*IPriv)->getType(); 5551 if (PrivTy->isVariablyModifiedType()) { 5552 // Get array size and emit VLA type. 5553 ++Idx; 5554 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5555 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5556 const VariableArrayType *VLA = 5557 CGF.getContext().getAsVariableArrayType(PrivTy); 5558 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5559 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5560 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5561 CGF.EmitVariablyModifiedType(PrivTy); 5562 } 5563 } 5564 Scope.Privatize(); 5565 IPriv = Privates.begin(); 5566 auto ILHS = LHSExprs.begin(); 5567 auto IRHS = RHSExprs.begin(); 5568 for (const Expr *E : ReductionOps) { 5569 if ((*IPriv)->getType()->isArrayType()) { 5570 // Emit reduction for array section. 5571 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5572 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5573 EmitOMPAggregateReduction( 5574 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5575 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5576 emitReductionCombiner(CGF, E); 5577 }); 5578 } else { 5579 // Emit reduction for array subscript or single variable. 5580 emitReductionCombiner(CGF, E); 5581 } 5582 ++IPriv; 5583 ++ILHS; 5584 ++IRHS; 5585 } 5586 Scope.ForceCleanup(); 5587 CGF.FinishFunction(); 5588 return Fn; 5589 } 5590 5591 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5592 const Expr *ReductionOp, 5593 const Expr *PrivateRef, 5594 const DeclRefExpr *LHS, 5595 const DeclRefExpr *RHS) { 5596 if (PrivateRef->getType()->isArrayType()) { 5597 // Emit reduction for array section. 5598 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5599 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5600 EmitOMPAggregateReduction( 5601 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5602 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5603 emitReductionCombiner(CGF, ReductionOp); 5604 }); 5605 } else { 5606 // Emit reduction for array subscript or single variable. 5607 emitReductionCombiner(CGF, ReductionOp); 5608 } 5609 } 5610 5611 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5612 ArrayRef<const Expr *> Privates, 5613 ArrayRef<const Expr *> LHSExprs, 5614 ArrayRef<const Expr *> RHSExprs, 5615 ArrayRef<const Expr *> ReductionOps, 5616 ReductionOptionsTy Options) { 5617 if (!CGF.HaveInsertPoint()) 5618 return; 5619 5620 bool WithNowait = Options.WithNowait; 5621 bool SimpleReduction = Options.SimpleReduction; 5622 5623 // Next code should be emitted for reduction: 5624 // 5625 // static kmp_critical_name lock = { 0 }; 5626 // 5627 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5628 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5629 // ... 5630 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5631 // *(Type<n>-1*)rhs[<n>-1]); 5632 // } 5633 // 5634 // ... 5635 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5636 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5637 // RedList, reduce_func, &<lock>)) { 5638 // case 1: 5639 // ... 5640 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5641 // ... 5642 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5643 // break; 5644 // case 2: 5645 // ... 5646 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5647 // ... 5648 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5649 // break; 5650 // default:; 5651 // } 5652 // 5653 // if SimpleReduction is true, only the next code is generated: 5654 // ... 5655 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5656 // ... 5657 5658 ASTContext &C = CGM.getContext(); 5659 5660 if (SimpleReduction) { 5661 CodeGenFunction::RunCleanupsScope Scope(CGF); 5662 auto IPriv = Privates.begin(); 5663 auto ILHS = LHSExprs.begin(); 5664 auto IRHS = RHSExprs.begin(); 5665 for (const Expr *E : ReductionOps) { 5666 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5667 cast<DeclRefExpr>(*IRHS)); 5668 ++IPriv; 5669 ++ILHS; 5670 ++IRHS; 5671 } 5672 return; 5673 } 5674 5675 // 1. Build a list of reduction variables. 5676 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5677 auto Size = RHSExprs.size(); 5678 for (const Expr *E : Privates) { 5679 if (E->getType()->isVariablyModifiedType()) 5680 // Reserve place for array size. 5681 ++Size; 5682 } 5683 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5684 QualType ReductionArrayTy = 5685 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5686 /*IndexTypeQuals=*/0); 5687 Address ReductionList = 5688 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5689 auto IPriv = Privates.begin(); 5690 unsigned Idx = 0; 5691 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5692 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5693 CGF.Builder.CreateStore( 5694 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5695 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5696 Elem); 5697 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5698 // Store array size. 5699 ++Idx; 5700 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5701 llvm::Value *Size = CGF.Builder.CreateIntCast( 5702 CGF.getVLASize( 5703 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5704 .NumElts, 5705 CGF.SizeTy, /*isSigned=*/false); 5706 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5707 Elem); 5708 } 5709 } 5710 5711 // 2. Emit reduce_func(). 5712 llvm::Function *ReductionFn = emitReductionFunction( 5713 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5714 LHSExprs, RHSExprs, ReductionOps); 5715 5716 // 3. Create static kmp_critical_name lock = { 0 }; 5717 std::string Name = getName({"reduction"}); 5718 llvm::Value *Lock = getCriticalRegionLock(Name); 5719 5720 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5721 // RedList, reduce_func, &<lock>); 5722 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5723 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5724 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5725 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5726 ReductionList.getPointer(), CGF.VoidPtrTy); 5727 llvm::Value *Args[] = { 5728 IdentTLoc, // ident_t *<loc> 5729 ThreadId, // i32 <gtid> 5730 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5731 ReductionArrayTySize, // size_type sizeof(RedList) 5732 RL, // void *RedList 5733 ReductionFn, // void (*) (void *, void *) <reduce_func> 5734 Lock // kmp_critical_name *&<lock> 5735 }; 5736 llvm::Value *Res = CGF.EmitRuntimeCall( 5737 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5738 : OMPRTL__kmpc_reduce), 5739 Args); 5740 5741 // 5. Build switch(res) 5742 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5743 llvm::SwitchInst *SwInst = 5744 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5745 5746 // 6. Build case 1: 5747 // ... 5748 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5749 // ... 5750 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5751 // break; 5752 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5753 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5754 CGF.EmitBlock(Case1BB); 5755 5756 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5757 llvm::Value *EndArgs[] = { 5758 IdentTLoc, // ident_t *<loc> 5759 ThreadId, // i32 <gtid> 5760 Lock // kmp_critical_name *&<lock> 5761 }; 5762 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5763 CodeGenFunction &CGF, PrePostActionTy &Action) { 5764 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5765 auto IPriv = Privates.begin(); 5766 auto ILHS = LHSExprs.begin(); 5767 auto IRHS = RHSExprs.begin(); 5768 for (const Expr *E : ReductionOps) { 5769 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5770 cast<DeclRefExpr>(*IRHS)); 5771 ++IPriv; 5772 ++ILHS; 5773 ++IRHS; 5774 } 5775 }; 5776 RegionCodeGenTy RCG(CodeGen); 5777 CommonActionTy Action( 5778 nullptr, llvm::None, 5779 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5780 : OMPRTL__kmpc_end_reduce), 5781 EndArgs); 5782 RCG.setAction(Action); 5783 RCG(CGF); 5784 5785 CGF.EmitBranch(DefaultBB); 5786 5787 // 7. Build case 2: 5788 // ... 5789 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5790 // ... 5791 // break; 5792 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5793 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5794 CGF.EmitBlock(Case2BB); 5795 5796 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5797 CodeGenFunction &CGF, PrePostActionTy &Action) { 5798 auto ILHS = LHSExprs.begin(); 5799 auto IRHS = RHSExprs.begin(); 5800 auto IPriv = Privates.begin(); 5801 for (const Expr *E : ReductionOps) { 5802 const Expr *XExpr = nullptr; 5803 const Expr *EExpr = nullptr; 5804 const Expr *UpExpr = nullptr; 5805 BinaryOperatorKind BO = BO_Comma; 5806 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5807 if (BO->getOpcode() == BO_Assign) { 5808 XExpr = BO->getLHS(); 5809 UpExpr = BO->getRHS(); 5810 } 5811 } 5812 // Try to emit update expression as a simple atomic. 5813 const Expr *RHSExpr = UpExpr; 5814 if (RHSExpr) { 5815 // Analyze RHS part of the whole expression. 5816 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5817 RHSExpr->IgnoreParenImpCasts())) { 5818 // If this is a conditional operator, analyze its condition for 5819 // min/max reduction operator. 5820 RHSExpr = ACO->getCond(); 5821 } 5822 if (const auto *BORHS = 5823 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5824 EExpr = BORHS->getRHS(); 5825 BO = BORHS->getOpcode(); 5826 } 5827 } 5828 if (XExpr) { 5829 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5830 auto &&AtomicRedGen = [BO, VD, 5831 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5832 const Expr *EExpr, const Expr *UpExpr) { 5833 LValue X = CGF.EmitLValue(XExpr); 5834 RValue E; 5835 if (EExpr) 5836 E = CGF.EmitAnyExpr(EExpr); 5837 CGF.EmitOMPAtomicSimpleUpdateExpr( 5838 X, E, BO, /*IsXLHSInRHSPart=*/true, 5839 llvm::AtomicOrdering::Monotonic, Loc, 5840 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5841 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5842 PrivateScope.addPrivate( 5843 VD, [&CGF, VD, XRValue, Loc]() { 5844 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5845 CGF.emitOMPSimpleStore( 5846 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5847 VD->getType().getNonReferenceType(), Loc); 5848 return LHSTemp; 5849 }); 5850 (void)PrivateScope.Privatize(); 5851 return CGF.EmitAnyExpr(UpExpr); 5852 }); 5853 }; 5854 if ((*IPriv)->getType()->isArrayType()) { 5855 // Emit atomic reduction for array section. 5856 const auto *RHSVar = 5857 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5858 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5859 AtomicRedGen, XExpr, EExpr, UpExpr); 5860 } else { 5861 // Emit atomic reduction for array subscript or single variable. 5862 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5863 } 5864 } else { 5865 // Emit as a critical region. 5866 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5867 const Expr *, const Expr *) { 5868 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5869 std::string Name = RT.getName({"atomic_reduction"}); 5870 RT.emitCriticalRegion( 5871 CGF, Name, 5872 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5873 Action.Enter(CGF); 5874 emitReductionCombiner(CGF, E); 5875 }, 5876 Loc); 5877 }; 5878 if ((*IPriv)->getType()->isArrayType()) { 5879 const auto *LHSVar = 5880 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5881 const auto *RHSVar = 5882 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5883 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5884 CritRedGen); 5885 } else { 5886 CritRedGen(CGF, nullptr, nullptr, nullptr); 5887 } 5888 } 5889 ++ILHS; 5890 ++IRHS; 5891 ++IPriv; 5892 } 5893 }; 5894 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5895 if (!WithNowait) { 5896 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5897 llvm::Value *EndArgs[] = { 5898 IdentTLoc, // ident_t *<loc> 5899 ThreadId, // i32 <gtid> 5900 Lock // kmp_critical_name *&<lock> 5901 }; 5902 CommonActionTy Action(nullptr, llvm::None, 5903 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5904 EndArgs); 5905 AtomicRCG.setAction(Action); 5906 AtomicRCG(CGF); 5907 } else { 5908 AtomicRCG(CGF); 5909 } 5910 5911 CGF.EmitBranch(DefaultBB); 5912 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5913 } 5914 5915 /// Generates unique name for artificial threadprivate variables. 5916 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5917 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5918 const Expr *Ref) { 5919 SmallString<256> Buffer; 5920 llvm::raw_svector_ostream Out(Buffer); 5921 const clang::DeclRefExpr *DE; 5922 const VarDecl *D = ::getBaseDecl(Ref, DE); 5923 if (!D) 5924 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5925 D = D->getCanonicalDecl(); 5926 std::string Name = CGM.getOpenMPRuntime().getName( 5927 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5928 Out << Prefix << Name << "_" 5929 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5930 return Out.str(); 5931 } 5932 5933 /// Emits reduction initializer function: 5934 /// \code 5935 /// void @.red_init(void* %arg) { 5936 /// %0 = bitcast void* %arg to <type>* 5937 /// store <type> <init>, <type>* %0 5938 /// ret void 5939 /// } 5940 /// \endcode 5941 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5942 SourceLocation Loc, 5943 ReductionCodeGen &RCG, unsigned N) { 5944 ASTContext &C = CGM.getContext(); 5945 FunctionArgList Args; 5946 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5947 ImplicitParamDecl::Other); 5948 Args.emplace_back(&Param); 5949 const auto &FnInfo = 5950 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5951 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5952 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5953 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5954 Name, &CGM.getModule()); 5955 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5956 Fn->setDoesNotRecurse(); 5957 CodeGenFunction CGF(CGM); 5958 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5959 Address PrivateAddr = CGF.EmitLoadOfPointer( 5960 CGF.GetAddrOfLocalVar(&Param), 5961 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5962 llvm::Value *Size = nullptr; 5963 // If the size of the reduction item is non-constant, load it from global 5964 // threadprivate variable. 5965 if (RCG.getSizes(N).second) { 5966 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5967 CGF, CGM.getContext().getSizeType(), 5968 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5969 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5970 CGM.getContext().getSizeType(), Loc); 5971 } 5972 RCG.emitAggregateType(CGF, N, Size); 5973 LValue SharedLVal; 5974 // If initializer uses initializer from declare reduction construct, emit a 5975 // pointer to the address of the original reduction item (reuired by reduction 5976 // initializer) 5977 if (RCG.usesReductionInitializer(N)) { 5978 Address SharedAddr = 5979 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5980 CGF, CGM.getContext().VoidPtrTy, 5981 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5982 SharedAddr = CGF.EmitLoadOfPointer( 5983 SharedAddr, 5984 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5985 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5986 } else { 5987 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5988 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5989 CGM.getContext().VoidPtrTy); 5990 } 5991 // Emit the initializer: 5992 // %0 = bitcast void* %arg to <type>* 5993 // store <type> <init>, <type>* %0 5994 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5995 [](CodeGenFunction &) { return false; }); 5996 CGF.FinishFunction(); 5997 return Fn; 5998 } 5999 6000 /// Emits reduction combiner function: 6001 /// \code 6002 /// void @.red_comb(void* %arg0, void* %arg1) { 6003 /// %lhs = bitcast void* %arg0 to <type>* 6004 /// %rhs = bitcast void* %arg1 to <type>* 6005 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6006 /// store <type> %2, <type>* %lhs 6007 /// ret void 6008 /// } 6009 /// \endcode 6010 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6011 SourceLocation Loc, 6012 ReductionCodeGen &RCG, unsigned N, 6013 const Expr *ReductionOp, 6014 const Expr *LHS, const Expr *RHS, 6015 const Expr *PrivateRef) { 6016 ASTContext &C = CGM.getContext(); 6017 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6018 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6019 FunctionArgList Args; 6020 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6021 C.VoidPtrTy, ImplicitParamDecl::Other); 6022 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6023 ImplicitParamDecl::Other); 6024 Args.emplace_back(&ParamInOut); 6025 Args.emplace_back(&ParamIn); 6026 const auto &FnInfo = 6027 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6028 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6029 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6030 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6031 Name, &CGM.getModule()); 6032 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6033 Fn->setDoesNotRecurse(); 6034 CodeGenFunction CGF(CGM); 6035 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6036 llvm::Value *Size = nullptr; 6037 // If the size of the reduction item is non-constant, load it from global 6038 // threadprivate variable. 6039 if (RCG.getSizes(N).second) { 6040 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6041 CGF, CGM.getContext().getSizeType(), 6042 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6043 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6044 CGM.getContext().getSizeType(), Loc); 6045 } 6046 RCG.emitAggregateType(CGF, N, Size); 6047 // Remap lhs and rhs variables to the addresses of the function arguments. 6048 // %lhs = bitcast void* %arg0 to <type>* 6049 // %rhs = bitcast void* %arg1 to <type>* 6050 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6051 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6052 // Pull out the pointer to the variable. 6053 Address PtrAddr = CGF.EmitLoadOfPointer( 6054 CGF.GetAddrOfLocalVar(&ParamInOut), 6055 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6056 return CGF.Builder.CreateElementBitCast( 6057 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6058 }); 6059 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6060 // Pull out the pointer to the variable. 6061 Address PtrAddr = CGF.EmitLoadOfPointer( 6062 CGF.GetAddrOfLocalVar(&ParamIn), 6063 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6064 return CGF.Builder.CreateElementBitCast( 6065 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6066 }); 6067 PrivateScope.Privatize(); 6068 // Emit the combiner body: 6069 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6070 // store <type> %2, <type>* %lhs 6071 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6072 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6073 cast<DeclRefExpr>(RHS)); 6074 CGF.FinishFunction(); 6075 return Fn; 6076 } 6077 6078 /// Emits reduction finalizer function: 6079 /// \code 6080 /// void @.red_fini(void* %arg) { 6081 /// %0 = bitcast void* %arg to <type>* 6082 /// <destroy>(<type>* %0) 6083 /// ret void 6084 /// } 6085 /// \endcode 6086 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6087 SourceLocation Loc, 6088 ReductionCodeGen &RCG, unsigned N) { 6089 if (!RCG.needCleanups(N)) 6090 return nullptr; 6091 ASTContext &C = CGM.getContext(); 6092 FunctionArgList Args; 6093 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6094 ImplicitParamDecl::Other); 6095 Args.emplace_back(&Param); 6096 const auto &FnInfo = 6097 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6098 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6099 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6100 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6101 Name, &CGM.getModule()); 6102 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6103 Fn->setDoesNotRecurse(); 6104 CodeGenFunction CGF(CGM); 6105 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6106 Address PrivateAddr = CGF.EmitLoadOfPointer( 6107 CGF.GetAddrOfLocalVar(&Param), 6108 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6109 llvm::Value *Size = nullptr; 6110 // If the size of the reduction item is non-constant, load it from global 6111 // threadprivate variable. 6112 if (RCG.getSizes(N).second) { 6113 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6114 CGF, CGM.getContext().getSizeType(), 6115 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6116 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6117 CGM.getContext().getSizeType(), Loc); 6118 } 6119 RCG.emitAggregateType(CGF, N, Size); 6120 // Emit the finalizer body: 6121 // <destroy>(<type>* %0) 6122 RCG.emitCleanups(CGF, N, PrivateAddr); 6123 CGF.FinishFunction(); 6124 return Fn; 6125 } 6126 6127 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6128 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6129 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6130 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6131 return nullptr; 6132 6133 // Build typedef struct: 6134 // kmp_task_red_input { 6135 // void *reduce_shar; // shared reduction item 6136 // size_t reduce_size; // size of data item 6137 // void *reduce_init; // data initialization routine 6138 // void *reduce_fini; // data finalization routine 6139 // void *reduce_comb; // data combiner routine 6140 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6141 // } kmp_task_red_input_t; 6142 ASTContext &C = CGM.getContext(); 6143 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6144 RD->startDefinition(); 6145 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6146 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6147 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6148 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6149 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6150 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6151 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6152 RD->completeDefinition(); 6153 QualType RDType = C.getRecordType(RD); 6154 unsigned Size = Data.ReductionVars.size(); 6155 llvm::APInt ArraySize(/*numBits=*/64, Size); 6156 QualType ArrayRDType = C.getConstantArrayType( 6157 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6158 // kmp_task_red_input_t .rd_input.[Size]; 6159 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6160 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6161 Data.ReductionOps); 6162 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6163 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6164 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6165 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6166 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6167 TaskRedInput.getPointer(), Idxs, 6168 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6169 ".rd_input.gep."); 6170 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6171 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6172 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6173 RCG.emitSharedLValue(CGF, Cnt); 6174 llvm::Value *CastedShared = 6175 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6176 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6177 RCG.emitAggregateType(CGF, Cnt); 6178 llvm::Value *SizeValInChars; 6179 llvm::Value *SizeVal; 6180 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6181 // We use delayed creation/initialization for VLAs, array sections and 6182 // custom reduction initializations. It is required because runtime does not 6183 // provide the way to pass the sizes of VLAs/array sections to 6184 // initializer/combiner/finalizer functions and does not pass the pointer to 6185 // original reduction item to the initializer. Instead threadprivate global 6186 // variables are used to store these values and use them in the functions. 6187 bool DelayedCreation = !!SizeVal; 6188 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6189 /*isSigned=*/false); 6190 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6191 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6192 // ElemLVal.reduce_init = init; 6193 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6194 llvm::Value *InitAddr = 6195 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6196 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6197 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6198 // ElemLVal.reduce_fini = fini; 6199 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6200 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6201 llvm::Value *FiniAddr = Fini 6202 ? CGF.EmitCastToVoidPtr(Fini) 6203 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6204 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6205 // ElemLVal.reduce_comb = comb; 6206 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6207 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6208 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6209 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6210 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6211 // ElemLVal.flags = 0; 6212 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6213 if (DelayedCreation) { 6214 CGF.EmitStoreOfScalar( 6215 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 6216 FlagsLVal); 6217 } else 6218 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6219 } 6220 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6221 // *data); 6222 llvm::Value *Args[] = { 6223 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6224 /*isSigned=*/true), 6225 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6226 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6227 CGM.VoidPtrTy)}; 6228 return CGF.EmitRuntimeCall( 6229 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6230 } 6231 6232 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6233 SourceLocation Loc, 6234 ReductionCodeGen &RCG, 6235 unsigned N) { 6236 auto Sizes = RCG.getSizes(N); 6237 // Emit threadprivate global variable if the type is non-constant 6238 // (Sizes.second = nullptr). 6239 if (Sizes.second) { 6240 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6241 /*isSigned=*/false); 6242 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6243 CGF, CGM.getContext().getSizeType(), 6244 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6245 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6246 } 6247 // Store address of the original reduction item if custom initializer is used. 6248 if (RCG.usesReductionInitializer(N)) { 6249 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6250 CGF, CGM.getContext().VoidPtrTy, 6251 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6252 CGF.Builder.CreateStore( 6253 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6254 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6255 SharedAddr, /*IsVolatile=*/false); 6256 } 6257 } 6258 6259 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6260 SourceLocation Loc, 6261 llvm::Value *ReductionsPtr, 6262 LValue SharedLVal) { 6263 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6264 // *d); 6265 llvm::Value *Args[] = { 6266 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6267 /*isSigned=*/true), 6268 ReductionsPtr, 6269 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6270 CGM.VoidPtrTy)}; 6271 return Address( 6272 CGF.EmitRuntimeCall( 6273 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6274 SharedLVal.getAlignment()); 6275 } 6276 6277 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6278 SourceLocation Loc) { 6279 if (!CGF.HaveInsertPoint()) 6280 return; 6281 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6282 // global_tid); 6283 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6284 // Ignore return result until untied tasks are supported. 6285 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6286 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6287 Region->emitUntiedSwitch(CGF); 6288 } 6289 6290 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6291 OpenMPDirectiveKind InnerKind, 6292 const RegionCodeGenTy &CodeGen, 6293 bool HasCancel) { 6294 if (!CGF.HaveInsertPoint()) 6295 return; 6296 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6297 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6298 } 6299 6300 namespace { 6301 enum RTCancelKind { 6302 CancelNoreq = 0, 6303 CancelParallel = 1, 6304 CancelLoop = 2, 6305 CancelSections = 3, 6306 CancelTaskgroup = 4 6307 }; 6308 } // anonymous namespace 6309 6310 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6311 RTCancelKind CancelKind = CancelNoreq; 6312 if (CancelRegion == OMPD_parallel) 6313 CancelKind = CancelParallel; 6314 else if (CancelRegion == OMPD_for) 6315 CancelKind = CancelLoop; 6316 else if (CancelRegion == OMPD_sections) 6317 CancelKind = CancelSections; 6318 else { 6319 assert(CancelRegion == OMPD_taskgroup); 6320 CancelKind = CancelTaskgroup; 6321 } 6322 return CancelKind; 6323 } 6324 6325 void CGOpenMPRuntime::emitCancellationPointCall( 6326 CodeGenFunction &CGF, SourceLocation Loc, 6327 OpenMPDirectiveKind CancelRegion) { 6328 if (!CGF.HaveInsertPoint()) 6329 return; 6330 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6331 // global_tid, kmp_int32 cncl_kind); 6332 if (auto *OMPRegionInfo = 6333 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6334 // For 'cancellation point taskgroup', the task region info may not have a 6335 // cancel. This may instead happen in another adjacent task. 6336 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6337 llvm::Value *Args[] = { 6338 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6339 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6340 // Ignore return result until untied tasks are supported. 6341 llvm::Value *Result = CGF.EmitRuntimeCall( 6342 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6343 // if (__kmpc_cancellationpoint()) { 6344 // exit from construct; 6345 // } 6346 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6347 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6348 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6349 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6350 CGF.EmitBlock(ExitBB); 6351 // exit from construct; 6352 CodeGenFunction::JumpDest CancelDest = 6353 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6354 CGF.EmitBranchThroughCleanup(CancelDest); 6355 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6356 } 6357 } 6358 } 6359 6360 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6361 const Expr *IfCond, 6362 OpenMPDirectiveKind CancelRegion) { 6363 if (!CGF.HaveInsertPoint()) 6364 return; 6365 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6366 // kmp_int32 cncl_kind); 6367 if (auto *OMPRegionInfo = 6368 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6369 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6370 PrePostActionTy &) { 6371 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6372 llvm::Value *Args[] = { 6373 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6374 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6375 // Ignore return result until untied tasks are supported. 6376 llvm::Value *Result = CGF.EmitRuntimeCall( 6377 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6378 // if (__kmpc_cancel()) { 6379 // exit from construct; 6380 // } 6381 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6382 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6383 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6384 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6385 CGF.EmitBlock(ExitBB); 6386 // exit from construct; 6387 CodeGenFunction::JumpDest CancelDest = 6388 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6389 CGF.EmitBranchThroughCleanup(CancelDest); 6390 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6391 }; 6392 if (IfCond) { 6393 emitOMPIfClause(CGF, IfCond, ThenGen, 6394 [](CodeGenFunction &, PrePostActionTy &) {}); 6395 } else { 6396 RegionCodeGenTy ThenRCG(ThenGen); 6397 ThenRCG(CGF); 6398 } 6399 } 6400 } 6401 6402 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6403 const OMPExecutableDirective &D, StringRef ParentName, 6404 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6405 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6406 assert(!ParentName.empty() && "Invalid target region parent name!"); 6407 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6408 IsOffloadEntry, CodeGen); 6409 } 6410 6411 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6412 const OMPExecutableDirective &D, StringRef ParentName, 6413 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6414 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6415 // Create a unique name for the entry function using the source location 6416 // information of the current target region. The name will be something like: 6417 // 6418 // __omp_offloading_DD_FFFF_PP_lBB 6419 // 6420 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6421 // mangled name of the function that encloses the target region and BB is the 6422 // line number of the target region. 6423 6424 unsigned DeviceID; 6425 unsigned FileID; 6426 unsigned Line; 6427 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6428 Line); 6429 SmallString<64> EntryFnName; 6430 { 6431 llvm::raw_svector_ostream OS(EntryFnName); 6432 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6433 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6434 } 6435 6436 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6437 6438 CodeGenFunction CGF(CGM, true); 6439 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6440 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6441 6442 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6443 6444 // If this target outline function is not an offload entry, we don't need to 6445 // register it. 6446 if (!IsOffloadEntry) 6447 return; 6448 6449 // The target region ID is used by the runtime library to identify the current 6450 // target region, so it only has to be unique and not necessarily point to 6451 // anything. It could be the pointer to the outlined function that implements 6452 // the target region, but we aren't using that so that the compiler doesn't 6453 // need to keep that, and could therefore inline the host function if proven 6454 // worthwhile during optimization. In the other hand, if emitting code for the 6455 // device, the ID has to be the function address so that it can retrieved from 6456 // the offloading entry and launched by the runtime library. We also mark the 6457 // outlined function to have external linkage in case we are emitting code for 6458 // the device, because these functions will be entry points to the device. 6459 6460 if (CGM.getLangOpts().OpenMPIsDevice) { 6461 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6462 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6463 OutlinedFn->setDSOLocal(false); 6464 } else { 6465 std::string Name = getName({EntryFnName, "region_id"}); 6466 OutlinedFnID = new llvm::GlobalVariable( 6467 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6468 llvm::GlobalValue::WeakAnyLinkage, 6469 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6470 } 6471 6472 // Register the information for the entry associated with this target region. 6473 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6474 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6475 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6476 } 6477 6478 /// Checks if the expression is constant or does not have non-trivial function 6479 /// calls. 6480 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6481 // We can skip constant expressions. 6482 // We can skip expressions with trivial calls or simple expressions. 6483 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6484 !E->hasNonTrivialCall(Ctx)) && 6485 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6486 } 6487 6488 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6489 const Stmt *Body) { 6490 const Stmt *Child = Body->IgnoreContainers(); 6491 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6492 Child = nullptr; 6493 for (const Stmt *S : C->body()) { 6494 if (const auto *E = dyn_cast<Expr>(S)) { 6495 if (isTrivial(Ctx, E)) 6496 continue; 6497 } 6498 // Some of the statements can be ignored. 6499 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6500 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6501 continue; 6502 // Analyze declarations. 6503 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6504 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6505 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6506 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6507 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6508 isa<UsingDirectiveDecl>(D) || 6509 isa<OMPDeclareReductionDecl>(D) || 6510 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6511 return true; 6512 const auto *VD = dyn_cast<VarDecl>(D); 6513 if (!VD) 6514 return false; 6515 return VD->isConstexpr() || 6516 ((VD->getType().isTrivialType(Ctx) || 6517 VD->getType()->isReferenceType()) && 6518 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6519 })) 6520 continue; 6521 } 6522 // Found multiple children - cannot get the one child only. 6523 if (Child) 6524 return nullptr; 6525 Child = S; 6526 } 6527 if (Child) 6528 Child = Child->IgnoreContainers(); 6529 } 6530 return Child; 6531 } 6532 6533 /// Emit the number of teams for a target directive. Inspect the num_teams 6534 /// clause associated with a teams construct combined or closely nested 6535 /// with the target directive. 6536 /// 6537 /// Emit a team of size one for directives such as 'target parallel' that 6538 /// have no associated teams construct. 6539 /// 6540 /// Otherwise, return nullptr. 6541 static llvm::Value * 6542 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6543 const OMPExecutableDirective &D) { 6544 assert(!CGF.getLangOpts().OpenMPIsDevice && 6545 "Clauses associated with the teams directive expected to be emitted " 6546 "only for the host!"); 6547 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6548 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6549 "Expected target-based executable directive."); 6550 CGBuilderTy &Bld = CGF.Builder; 6551 switch (DirectiveKind) { 6552 case OMPD_target: { 6553 const auto *CS = D.getInnermostCapturedStmt(); 6554 const auto *Body = 6555 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6556 const Stmt *ChildStmt = 6557 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6558 if (const auto *NestedDir = 6559 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6560 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6561 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6562 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6563 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6564 const Expr *NumTeams = 6565 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6566 llvm::Value *NumTeamsVal = 6567 CGF.EmitScalarExpr(NumTeams, 6568 /*IgnoreResultAssign*/ true); 6569 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6570 /*IsSigned=*/true); 6571 } 6572 return Bld.getInt32(0); 6573 } 6574 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6575 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6576 return Bld.getInt32(1); 6577 return Bld.getInt32(0); 6578 } 6579 return nullptr; 6580 } 6581 case OMPD_target_teams: 6582 case OMPD_target_teams_distribute: 6583 case OMPD_target_teams_distribute_simd: 6584 case OMPD_target_teams_distribute_parallel_for: 6585 case OMPD_target_teams_distribute_parallel_for_simd: { 6586 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6587 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6588 const Expr *NumTeams = 6589 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6590 llvm::Value *NumTeamsVal = 6591 CGF.EmitScalarExpr(NumTeams, 6592 /*IgnoreResultAssign*/ true); 6593 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6594 /*IsSigned=*/true); 6595 } 6596 return Bld.getInt32(0); 6597 } 6598 case OMPD_target_parallel: 6599 case OMPD_target_parallel_for: 6600 case OMPD_target_parallel_for_simd: 6601 case OMPD_target_simd: 6602 return Bld.getInt32(1); 6603 case OMPD_parallel: 6604 case OMPD_for: 6605 case OMPD_parallel_for: 6606 case OMPD_parallel_sections: 6607 case OMPD_for_simd: 6608 case OMPD_parallel_for_simd: 6609 case OMPD_cancel: 6610 case OMPD_cancellation_point: 6611 case OMPD_ordered: 6612 case OMPD_threadprivate: 6613 case OMPD_allocate: 6614 case OMPD_task: 6615 case OMPD_simd: 6616 case OMPD_sections: 6617 case OMPD_section: 6618 case OMPD_single: 6619 case OMPD_master: 6620 case OMPD_critical: 6621 case OMPD_taskyield: 6622 case OMPD_barrier: 6623 case OMPD_taskwait: 6624 case OMPD_taskgroup: 6625 case OMPD_atomic: 6626 case OMPD_flush: 6627 case OMPD_teams: 6628 case OMPD_target_data: 6629 case OMPD_target_exit_data: 6630 case OMPD_target_enter_data: 6631 case OMPD_distribute: 6632 case OMPD_distribute_simd: 6633 case OMPD_distribute_parallel_for: 6634 case OMPD_distribute_parallel_for_simd: 6635 case OMPD_teams_distribute: 6636 case OMPD_teams_distribute_simd: 6637 case OMPD_teams_distribute_parallel_for: 6638 case OMPD_teams_distribute_parallel_for_simd: 6639 case OMPD_target_update: 6640 case OMPD_declare_simd: 6641 case OMPD_declare_target: 6642 case OMPD_end_declare_target: 6643 case OMPD_declare_reduction: 6644 case OMPD_declare_mapper: 6645 case OMPD_taskloop: 6646 case OMPD_taskloop_simd: 6647 case OMPD_requires: 6648 case OMPD_unknown: 6649 break; 6650 } 6651 llvm_unreachable("Unexpected directive kind."); 6652 } 6653 6654 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6655 llvm::Value *DefaultThreadLimitVal) { 6656 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6657 CGF.getContext(), CS->getCapturedStmt()); 6658 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6659 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6660 llvm::Value *NumThreads = nullptr; 6661 llvm::Value *CondVal = nullptr; 6662 // Handle if clause. If if clause present, the number of threads is 6663 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6664 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6665 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6666 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6667 const OMPIfClause *IfClause = nullptr; 6668 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6669 if (C->getNameModifier() == OMPD_unknown || 6670 C->getNameModifier() == OMPD_parallel) { 6671 IfClause = C; 6672 break; 6673 } 6674 } 6675 if (IfClause) { 6676 const Expr *Cond = IfClause->getCondition(); 6677 bool Result; 6678 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6679 if (!Result) 6680 return CGF.Builder.getInt32(1); 6681 } else { 6682 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6683 if (const auto *PreInit = 6684 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6685 for (const auto *I : PreInit->decls()) { 6686 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6687 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6688 } else { 6689 CodeGenFunction::AutoVarEmission Emission = 6690 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6691 CGF.EmitAutoVarCleanups(Emission); 6692 } 6693 } 6694 } 6695 CondVal = CGF.EvaluateExprAsBool(Cond); 6696 } 6697 } 6698 } 6699 // Check the value of num_threads clause iff if clause was not specified 6700 // or is not evaluated to false. 6701 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6702 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6703 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6704 const auto *NumThreadsClause = 6705 Dir->getSingleClause<OMPNumThreadsClause>(); 6706 CodeGenFunction::LexicalScope Scope( 6707 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6708 if (const auto *PreInit = 6709 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6710 for (const auto *I : PreInit->decls()) { 6711 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6712 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6713 } else { 6714 CodeGenFunction::AutoVarEmission Emission = 6715 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6716 CGF.EmitAutoVarCleanups(Emission); 6717 } 6718 } 6719 } 6720 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6721 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6722 /*IsSigned=*/false); 6723 if (DefaultThreadLimitVal) 6724 NumThreads = CGF.Builder.CreateSelect( 6725 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6726 DefaultThreadLimitVal, NumThreads); 6727 } else { 6728 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6729 : CGF.Builder.getInt32(0); 6730 } 6731 // Process condition of the if clause. 6732 if (CondVal) { 6733 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6734 CGF.Builder.getInt32(1)); 6735 } 6736 return NumThreads; 6737 } 6738 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6739 return CGF.Builder.getInt32(1); 6740 return DefaultThreadLimitVal; 6741 } 6742 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6743 : CGF.Builder.getInt32(0); 6744 } 6745 6746 /// Emit the number of threads for a target directive. Inspect the 6747 /// thread_limit clause associated with a teams construct combined or closely 6748 /// nested with the target directive. 6749 /// 6750 /// Emit the num_threads clause for directives such as 'target parallel' that 6751 /// have no associated teams construct. 6752 /// 6753 /// Otherwise, return nullptr. 6754 static llvm::Value * 6755 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6756 const OMPExecutableDirective &D) { 6757 assert(!CGF.getLangOpts().OpenMPIsDevice && 6758 "Clauses associated with the teams directive expected to be emitted " 6759 "only for the host!"); 6760 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6761 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6762 "Expected target-based executable directive."); 6763 CGBuilderTy &Bld = CGF.Builder; 6764 llvm::Value *ThreadLimitVal = nullptr; 6765 llvm::Value *NumThreadsVal = nullptr; 6766 switch (DirectiveKind) { 6767 case OMPD_target: { 6768 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6769 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6770 return NumThreads; 6771 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6772 CGF.getContext(), CS->getCapturedStmt()); 6773 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6774 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6775 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6776 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6777 const auto *ThreadLimitClause = 6778 Dir->getSingleClause<OMPThreadLimitClause>(); 6779 CodeGenFunction::LexicalScope Scope( 6780 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6781 if (const auto *PreInit = 6782 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6783 for (const auto *I : PreInit->decls()) { 6784 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6785 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6786 } else { 6787 CodeGenFunction::AutoVarEmission Emission = 6788 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6789 CGF.EmitAutoVarCleanups(Emission); 6790 } 6791 } 6792 } 6793 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6794 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6795 ThreadLimitVal = 6796 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6797 } 6798 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6799 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6800 CS = Dir->getInnermostCapturedStmt(); 6801 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6802 CGF.getContext(), CS->getCapturedStmt()); 6803 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6804 } 6805 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6806 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6807 CS = Dir->getInnermostCapturedStmt(); 6808 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6809 return NumThreads; 6810 } 6811 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6812 return Bld.getInt32(1); 6813 } 6814 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6815 } 6816 case OMPD_target_teams: { 6817 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6818 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6819 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6820 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6821 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6822 ThreadLimitVal = 6823 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6824 } 6825 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6826 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6827 return NumThreads; 6828 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6829 CGF.getContext(), CS->getCapturedStmt()); 6830 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6831 if (Dir->getDirectiveKind() == OMPD_distribute) { 6832 CS = Dir->getInnermostCapturedStmt(); 6833 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6834 return NumThreads; 6835 } 6836 } 6837 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6838 } 6839 case OMPD_target_teams_distribute: 6840 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6841 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6842 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6843 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6844 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6845 ThreadLimitVal = 6846 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6847 } 6848 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6849 case OMPD_target_parallel: 6850 case OMPD_target_parallel_for: 6851 case OMPD_target_parallel_for_simd: 6852 case OMPD_target_teams_distribute_parallel_for: 6853 case OMPD_target_teams_distribute_parallel_for_simd: { 6854 llvm::Value *CondVal = nullptr; 6855 // Handle if clause. If if clause present, the number of threads is 6856 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6857 if (D.hasClausesOfKind<OMPIfClause>()) { 6858 const OMPIfClause *IfClause = nullptr; 6859 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6860 if (C->getNameModifier() == OMPD_unknown || 6861 C->getNameModifier() == OMPD_parallel) { 6862 IfClause = C; 6863 break; 6864 } 6865 } 6866 if (IfClause) { 6867 const Expr *Cond = IfClause->getCondition(); 6868 bool Result; 6869 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6870 if (!Result) 6871 return Bld.getInt32(1); 6872 } else { 6873 CodeGenFunction::RunCleanupsScope Scope(CGF); 6874 CondVal = CGF.EvaluateExprAsBool(Cond); 6875 } 6876 } 6877 } 6878 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6879 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6880 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6881 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6882 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6883 ThreadLimitVal = 6884 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); 6885 } 6886 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6887 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6888 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6889 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6890 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6891 NumThreadsVal = 6892 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false); 6893 ThreadLimitVal = ThreadLimitVal 6894 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6895 ThreadLimitVal), 6896 NumThreadsVal, ThreadLimitVal) 6897 : NumThreadsVal; 6898 } 6899 if (!ThreadLimitVal) 6900 ThreadLimitVal = Bld.getInt32(0); 6901 if (CondVal) 6902 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6903 return ThreadLimitVal; 6904 } 6905 case OMPD_target_teams_distribute_simd: 6906 case OMPD_target_simd: 6907 return Bld.getInt32(1); 6908 case OMPD_parallel: 6909 case OMPD_for: 6910 case OMPD_parallel_for: 6911 case OMPD_parallel_sections: 6912 case OMPD_for_simd: 6913 case OMPD_parallel_for_simd: 6914 case OMPD_cancel: 6915 case OMPD_cancellation_point: 6916 case OMPD_ordered: 6917 case OMPD_threadprivate: 6918 case OMPD_allocate: 6919 case OMPD_task: 6920 case OMPD_simd: 6921 case OMPD_sections: 6922 case OMPD_section: 6923 case OMPD_single: 6924 case OMPD_master: 6925 case OMPD_critical: 6926 case OMPD_taskyield: 6927 case OMPD_barrier: 6928 case OMPD_taskwait: 6929 case OMPD_taskgroup: 6930 case OMPD_atomic: 6931 case OMPD_flush: 6932 case OMPD_teams: 6933 case OMPD_target_data: 6934 case OMPD_target_exit_data: 6935 case OMPD_target_enter_data: 6936 case OMPD_distribute: 6937 case OMPD_distribute_simd: 6938 case OMPD_distribute_parallel_for: 6939 case OMPD_distribute_parallel_for_simd: 6940 case OMPD_teams_distribute: 6941 case OMPD_teams_distribute_simd: 6942 case OMPD_teams_distribute_parallel_for: 6943 case OMPD_teams_distribute_parallel_for_simd: 6944 case OMPD_target_update: 6945 case OMPD_declare_simd: 6946 case OMPD_declare_target: 6947 case OMPD_end_declare_target: 6948 case OMPD_declare_reduction: 6949 case OMPD_declare_mapper: 6950 case OMPD_taskloop: 6951 case OMPD_taskloop_simd: 6952 case OMPD_requires: 6953 case OMPD_unknown: 6954 break; 6955 } 6956 llvm_unreachable("Unsupported directive kind."); 6957 } 6958 6959 namespace { 6960 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6961 6962 // Utility to handle information from clauses associated with a given 6963 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6964 // It provides a convenient interface to obtain the information and generate 6965 // code for that information. 6966 class MappableExprsHandler { 6967 public: 6968 /// Values for bit flags used to specify the mapping type for 6969 /// offloading. 6970 enum OpenMPOffloadMappingFlags : uint64_t { 6971 /// No flags 6972 OMP_MAP_NONE = 0x0, 6973 /// Allocate memory on the device and move data from host to device. 6974 OMP_MAP_TO = 0x01, 6975 /// Allocate memory on the device and move data from device to host. 6976 OMP_MAP_FROM = 0x02, 6977 /// Always perform the requested mapping action on the element, even 6978 /// if it was already mapped before. 6979 OMP_MAP_ALWAYS = 0x04, 6980 /// Delete the element from the device environment, ignoring the 6981 /// current reference count associated with the element. 6982 OMP_MAP_DELETE = 0x08, 6983 /// The element being mapped is a pointer-pointee pair; both the 6984 /// pointer and the pointee should be mapped. 6985 OMP_MAP_PTR_AND_OBJ = 0x10, 6986 /// This flags signals that the base address of an entry should be 6987 /// passed to the target kernel as an argument. 6988 OMP_MAP_TARGET_PARAM = 0x20, 6989 /// Signal that the runtime library has to return the device pointer 6990 /// in the current position for the data being mapped. Used when we have the 6991 /// use_device_ptr clause. 6992 OMP_MAP_RETURN_PARAM = 0x40, 6993 /// This flag signals that the reference being passed is a pointer to 6994 /// private data. 6995 OMP_MAP_PRIVATE = 0x80, 6996 /// Pass the element to the device by value. 6997 OMP_MAP_LITERAL = 0x100, 6998 /// Implicit map 6999 OMP_MAP_IMPLICIT = 0x200, 7000 /// The 16 MSBs of the flags indicate whether the entry is member of some 7001 /// struct/class. 7002 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7003 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7004 }; 7005 7006 /// Class that associates information with a base pointer to be passed to the 7007 /// runtime library. 7008 class BasePointerInfo { 7009 /// The base pointer. 7010 llvm::Value *Ptr = nullptr; 7011 /// The base declaration that refers to this device pointer, or null if 7012 /// there is none. 7013 const ValueDecl *DevPtrDecl = nullptr; 7014 7015 public: 7016 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7017 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7018 llvm::Value *operator*() const { return Ptr; } 7019 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7020 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7021 }; 7022 7023 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7024 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7025 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7026 7027 /// Map between a struct and the its lowest & highest elements which have been 7028 /// mapped. 7029 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7030 /// HE(FieldIndex, Pointer)} 7031 struct StructRangeInfoTy { 7032 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7033 0, Address::invalid()}; 7034 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7035 0, Address::invalid()}; 7036 Address Base = Address::invalid(); 7037 }; 7038 7039 private: 7040 /// Kind that defines how a device pointer has to be returned. 7041 struct MapInfo { 7042 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7043 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7044 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7045 bool ReturnDevicePointer = false; 7046 bool IsImplicit = false; 7047 7048 MapInfo() = default; 7049 MapInfo( 7050 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7051 OpenMPMapClauseKind MapType, 7052 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7053 bool ReturnDevicePointer, bool IsImplicit) 7054 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7055 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7056 }; 7057 7058 /// If use_device_ptr is used on a pointer which is a struct member and there 7059 /// is no map information about it, then emission of that entry is deferred 7060 /// until the whole struct has been processed. 7061 struct DeferredDevicePtrEntryTy { 7062 const Expr *IE = nullptr; 7063 const ValueDecl *VD = nullptr; 7064 7065 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7066 : IE(IE), VD(VD) {} 7067 }; 7068 7069 /// Directive from where the map clauses were extracted. 7070 const OMPExecutableDirective &CurDir; 7071 7072 /// Function the directive is being generated for. 7073 CodeGenFunction &CGF; 7074 7075 /// Set of all first private variables in the current directive. 7076 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 7077 7078 /// Map between device pointer declarations and their expression components. 7079 /// The key value for declarations in 'this' is null. 7080 llvm::DenseMap< 7081 const ValueDecl *, 7082 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7083 DevPointersMap; 7084 7085 llvm::Value *getExprTypeSize(const Expr *E) const { 7086 QualType ExprTy = E->getType().getCanonicalType(); 7087 7088 // Reference types are ignored for mapping purposes. 7089 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7090 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7091 7092 // Given that an array section is considered a built-in type, we need to 7093 // do the calculation based on the length of the section instead of relying 7094 // on CGF.getTypeSize(E->getType()). 7095 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7096 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7097 OAE->getBase()->IgnoreParenImpCasts()) 7098 .getCanonicalType(); 7099 7100 // If there is no length associated with the expression, that means we 7101 // are using the whole length of the base. 7102 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 7103 return CGF.getTypeSize(BaseTy); 7104 7105 llvm::Value *ElemSize; 7106 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7107 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7108 } else { 7109 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7110 assert(ATy && "Expecting array type if not a pointer type."); 7111 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7112 } 7113 7114 // If we don't have a length at this point, that is because we have an 7115 // array section with a single element. 7116 if (!OAE->getLength()) 7117 return ElemSize; 7118 7119 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 7120 LengthVal = 7121 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 7122 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7123 } 7124 return CGF.getTypeSize(ExprTy); 7125 } 7126 7127 /// Return the corresponding bits for a given map clause modifier. Add 7128 /// a flag marking the map as a pointer if requested. Add a flag marking the 7129 /// map as the first one of a series of maps that relate to the same map 7130 /// expression. 7131 OpenMPOffloadMappingFlags getMapTypeBits( 7132 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7133 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7134 OpenMPOffloadMappingFlags Bits = 7135 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7136 switch (MapType) { 7137 case OMPC_MAP_alloc: 7138 case OMPC_MAP_release: 7139 // alloc and release is the default behavior in the runtime library, i.e. 7140 // if we don't pass any bits alloc/release that is what the runtime is 7141 // going to do. Therefore, we don't need to signal anything for these two 7142 // type modifiers. 7143 break; 7144 case OMPC_MAP_to: 7145 Bits |= OMP_MAP_TO; 7146 break; 7147 case OMPC_MAP_from: 7148 Bits |= OMP_MAP_FROM; 7149 break; 7150 case OMPC_MAP_tofrom: 7151 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7152 break; 7153 case OMPC_MAP_delete: 7154 Bits |= OMP_MAP_DELETE; 7155 break; 7156 case OMPC_MAP_unknown: 7157 llvm_unreachable("Unexpected map type!"); 7158 } 7159 if (AddPtrFlag) 7160 Bits |= OMP_MAP_PTR_AND_OBJ; 7161 if (AddIsTargetParamFlag) 7162 Bits |= OMP_MAP_TARGET_PARAM; 7163 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7164 != MapModifiers.end()) 7165 Bits |= OMP_MAP_ALWAYS; 7166 return Bits; 7167 } 7168 7169 /// Return true if the provided expression is a final array section. A 7170 /// final array section, is one whose length can't be proved to be one. 7171 bool isFinalArraySectionExpression(const Expr *E) const { 7172 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7173 7174 // It is not an array section and therefore not a unity-size one. 7175 if (!OASE) 7176 return false; 7177 7178 // An array section with no colon always refer to a single element. 7179 if (OASE->getColonLoc().isInvalid()) 7180 return false; 7181 7182 const Expr *Length = OASE->getLength(); 7183 7184 // If we don't have a length we have to check if the array has size 1 7185 // for this dimension. Also, we should always expect a length if the 7186 // base type is pointer. 7187 if (!Length) { 7188 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7189 OASE->getBase()->IgnoreParenImpCasts()) 7190 .getCanonicalType(); 7191 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7192 return ATy->getSize().getSExtValue() != 1; 7193 // If we don't have a constant dimension length, we have to consider 7194 // the current section as having any size, so it is not necessarily 7195 // unitary. If it happen to be unity size, that's user fault. 7196 return true; 7197 } 7198 7199 // Check if the length evaluates to 1. 7200 Expr::EvalResult Result; 7201 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7202 return true; // Can have more that size 1. 7203 7204 llvm::APSInt ConstLength = Result.Val.getInt(); 7205 return ConstLength.getSExtValue() != 1; 7206 } 7207 7208 /// Generate the base pointers, section pointers, sizes and map type 7209 /// bits for the provided map type, map modifier, and expression components. 7210 /// \a IsFirstComponent should be set to true if the provided set of 7211 /// components is the first associated with a capture. 7212 void generateInfoForComponentList( 7213 OpenMPMapClauseKind MapType, 7214 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7215 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7216 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7217 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7218 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7219 bool IsImplicit, 7220 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7221 OverlappedElements = llvm::None) const { 7222 // The following summarizes what has to be generated for each map and the 7223 // types below. The generated information is expressed in this order: 7224 // base pointer, section pointer, size, flags 7225 // (to add to the ones that come from the map type and modifier). 7226 // 7227 // double d; 7228 // int i[100]; 7229 // float *p; 7230 // 7231 // struct S1 { 7232 // int i; 7233 // float f[50]; 7234 // } 7235 // struct S2 { 7236 // int i; 7237 // float f[50]; 7238 // S1 s; 7239 // double *p; 7240 // struct S2 *ps; 7241 // } 7242 // S2 s; 7243 // S2 *ps; 7244 // 7245 // map(d) 7246 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7247 // 7248 // map(i) 7249 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7250 // 7251 // map(i[1:23]) 7252 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7253 // 7254 // map(p) 7255 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7256 // 7257 // map(p[1:24]) 7258 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7259 // 7260 // map(s) 7261 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7262 // 7263 // map(s.i) 7264 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7265 // 7266 // map(s.s.f) 7267 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7268 // 7269 // map(s.p) 7270 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7271 // 7272 // map(to: s.p[:22]) 7273 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7274 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7275 // &(s.p), &(s.p[0]), 22*sizeof(double), 7276 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7277 // (*) alloc space for struct members, only this is a target parameter 7278 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7279 // optimizes this entry out, same in the examples below) 7280 // (***) map the pointee (map: to) 7281 // 7282 // map(s.ps) 7283 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7284 // 7285 // map(from: s.ps->s.i) 7286 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7287 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7288 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7289 // 7290 // map(to: s.ps->ps) 7291 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7292 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7293 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7294 // 7295 // map(s.ps->ps->ps) 7296 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7297 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7298 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7299 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7300 // 7301 // map(to: s.ps->ps->s.f[:22]) 7302 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7303 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7304 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7305 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7306 // 7307 // map(ps) 7308 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7309 // 7310 // map(ps->i) 7311 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7312 // 7313 // map(ps->s.f) 7314 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7315 // 7316 // map(from: ps->p) 7317 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7318 // 7319 // map(to: ps->p[:22]) 7320 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7321 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7322 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7323 // 7324 // map(ps->ps) 7325 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7326 // 7327 // map(from: ps->ps->s.i) 7328 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7329 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7330 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7331 // 7332 // map(from: ps->ps->ps) 7333 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7334 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7335 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7336 // 7337 // map(ps->ps->ps->ps) 7338 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7339 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7340 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7341 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7342 // 7343 // map(to: ps->ps->ps->s.f[:22]) 7344 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7345 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7346 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7347 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7348 // 7349 // map(to: s.f[:22]) map(from: s.p[:33]) 7350 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7351 // sizeof(double*) (**), TARGET_PARAM 7352 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7353 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7354 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7355 // (*) allocate contiguous space needed to fit all mapped members even if 7356 // we allocate space for members not mapped (in this example, 7357 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7358 // them as well because they fall between &s.f[0] and &s.p) 7359 // 7360 // map(from: s.f[:22]) map(to: ps->p[:33]) 7361 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7362 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7363 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7364 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7365 // (*) the struct this entry pertains to is the 2nd element in the list of 7366 // arguments, hence MEMBER_OF(2) 7367 // 7368 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7369 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7370 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7371 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7372 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7373 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7374 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7375 // (*) the struct this entry pertains to is the 4th element in the list 7376 // of arguments, hence MEMBER_OF(4) 7377 7378 // Track if the map information being generated is the first for a capture. 7379 bool IsCaptureFirstInfo = IsFirstComponentList; 7380 bool IsLink = false; // Is this variable a "declare target link"? 7381 7382 // Scan the components from the base to the complete expression. 7383 auto CI = Components.rbegin(); 7384 auto CE = Components.rend(); 7385 auto I = CI; 7386 7387 // Track if the map information being generated is the first for a list of 7388 // components. 7389 bool IsExpressionFirstInfo = true; 7390 Address BP = Address::invalid(); 7391 const Expr *AssocExpr = I->getAssociatedExpression(); 7392 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7393 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7394 7395 if (isa<MemberExpr>(AssocExpr)) { 7396 // The base is the 'this' pointer. The content of the pointer is going 7397 // to be the base of the field being mapped. 7398 BP = CGF.LoadCXXThisAddress(); 7399 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7400 (OASE && 7401 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7402 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7403 } else { 7404 // The base is the reference to the variable. 7405 // BP = &Var. 7406 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7407 if (const auto *VD = 7408 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7409 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7410 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) 7411 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { 7412 IsLink = true; 7413 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 7414 } 7415 } 7416 7417 // If the variable is a pointer and is being dereferenced (i.e. is not 7418 // the last component), the base has to be the pointer itself, not its 7419 // reference. References are ignored for mapping purposes. 7420 QualType Ty = 7421 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7422 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7423 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7424 7425 // We do not need to generate individual map information for the 7426 // pointer, it can be associated with the combined storage. 7427 ++I; 7428 } 7429 } 7430 7431 // Track whether a component of the list should be marked as MEMBER_OF some 7432 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7433 // in a component list should be marked as MEMBER_OF, all subsequent entries 7434 // do not belong to the base struct. E.g. 7435 // struct S2 s; 7436 // s.ps->ps->ps->f[:] 7437 // (1) (2) (3) (4) 7438 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7439 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7440 // is the pointee of ps(2) which is not member of struct s, so it should not 7441 // be marked as such (it is still PTR_AND_OBJ). 7442 // The variable is initialized to false so that PTR_AND_OBJ entries which 7443 // are not struct members are not considered (e.g. array of pointers to 7444 // data). 7445 bool ShouldBeMemberOf = false; 7446 7447 // Variable keeping track of whether or not we have encountered a component 7448 // in the component list which is a member expression. Useful when we have a 7449 // pointer or a final array section, in which case it is the previous 7450 // component in the list which tells us whether we have a member expression. 7451 // E.g. X.f[:] 7452 // While processing the final array section "[:]" it is "f" which tells us 7453 // whether we are dealing with a member of a declared struct. 7454 const MemberExpr *EncounteredME = nullptr; 7455 7456 for (; I != CE; ++I) { 7457 // If the current component is member of a struct (parent struct) mark it. 7458 if (!EncounteredME) { 7459 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7460 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7461 // as MEMBER_OF the parent struct. 7462 if (EncounteredME) 7463 ShouldBeMemberOf = true; 7464 } 7465 7466 auto Next = std::next(I); 7467 7468 // We need to generate the addresses and sizes if this is the last 7469 // component, if the component is a pointer or if it is an array section 7470 // whose length can't be proved to be one. If this is a pointer, it 7471 // becomes the base address for the following components. 7472 7473 // A final array section, is one whose length can't be proved to be one. 7474 bool IsFinalArraySection = 7475 isFinalArraySectionExpression(I->getAssociatedExpression()); 7476 7477 // Get information on whether the element is a pointer. Have to do a 7478 // special treatment for array sections given that they are built-in 7479 // types. 7480 const auto *OASE = 7481 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7482 bool IsPointer = 7483 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7484 .getCanonicalType() 7485 ->isAnyPointerType()) || 7486 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7487 7488 if (Next == CE || IsPointer || IsFinalArraySection) { 7489 // If this is not the last component, we expect the pointer to be 7490 // associated with an array expression or member expression. 7491 assert((Next == CE || 7492 isa<MemberExpr>(Next->getAssociatedExpression()) || 7493 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7494 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7495 "Unexpected expression"); 7496 7497 Address LB = 7498 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7499 7500 // If this component is a pointer inside the base struct then we don't 7501 // need to create any entry for it - it will be combined with the object 7502 // it is pointing to into a single PTR_AND_OBJ entry. 7503 bool IsMemberPointer = 7504 IsPointer && EncounteredME && 7505 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7506 EncounteredME); 7507 if (!OverlappedElements.empty()) { 7508 // Handle base element with the info for overlapped elements. 7509 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7510 assert(Next == CE && 7511 "Expected last element for the overlapped elements."); 7512 assert(!IsPointer && 7513 "Unexpected base element with the pointer type."); 7514 // Mark the whole struct as the struct that requires allocation on the 7515 // device. 7516 PartialStruct.LowestElem = {0, LB}; 7517 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7518 I->getAssociatedExpression()->getType()); 7519 Address HB = CGF.Builder.CreateConstGEP( 7520 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7521 CGF.VoidPtrTy), 7522 TypeSize.getQuantity() - 1); 7523 PartialStruct.HighestElem = { 7524 std::numeric_limits<decltype( 7525 PartialStruct.HighestElem.first)>::max(), 7526 HB}; 7527 PartialStruct.Base = BP; 7528 // Emit data for non-overlapped data. 7529 OpenMPOffloadMappingFlags Flags = 7530 OMP_MAP_MEMBER_OF | 7531 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7532 /*AddPtrFlag=*/false, 7533 /*AddIsTargetParamFlag=*/false); 7534 LB = BP; 7535 llvm::Value *Size = nullptr; 7536 // Do bitcopy of all non-overlapped structure elements. 7537 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7538 Component : OverlappedElements) { 7539 Address ComponentLB = Address::invalid(); 7540 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7541 Component) { 7542 if (MC.getAssociatedDeclaration()) { 7543 ComponentLB = 7544 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7545 .getAddress(); 7546 Size = CGF.Builder.CreatePtrDiff( 7547 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7548 CGF.EmitCastToVoidPtr(LB.getPointer())); 7549 break; 7550 } 7551 } 7552 BasePointers.push_back(BP.getPointer()); 7553 Pointers.push_back(LB.getPointer()); 7554 Sizes.push_back(Size); 7555 Types.push_back(Flags); 7556 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7557 } 7558 BasePointers.push_back(BP.getPointer()); 7559 Pointers.push_back(LB.getPointer()); 7560 Size = CGF.Builder.CreatePtrDiff( 7561 CGF.EmitCastToVoidPtr( 7562 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7563 CGF.EmitCastToVoidPtr(LB.getPointer())); 7564 Sizes.push_back(Size); 7565 Types.push_back(Flags); 7566 break; 7567 } 7568 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7569 if (!IsMemberPointer) { 7570 BasePointers.push_back(BP.getPointer()); 7571 Pointers.push_back(LB.getPointer()); 7572 Sizes.push_back(Size); 7573 7574 // We need to add a pointer flag for each map that comes from the 7575 // same expression except for the first one. We also need to signal 7576 // this map is the first one that relates with the current capture 7577 // (there is a set of entries for each capture). 7578 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7579 MapType, MapModifiers, IsImplicit, 7580 !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); 7581 7582 if (!IsExpressionFirstInfo) { 7583 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7584 // then we reset the TO/FROM/ALWAYS/DELETE flags. 7585 if (IsPointer) 7586 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7587 OMP_MAP_DELETE); 7588 7589 if (ShouldBeMemberOf) { 7590 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7591 // should be later updated with the correct value of MEMBER_OF. 7592 Flags |= OMP_MAP_MEMBER_OF; 7593 // From now on, all subsequent PTR_AND_OBJ entries should not be 7594 // marked as MEMBER_OF. 7595 ShouldBeMemberOf = false; 7596 } 7597 } 7598 7599 Types.push_back(Flags); 7600 } 7601 7602 // If we have encountered a member expression so far, keep track of the 7603 // mapped member. If the parent is "*this", then the value declaration 7604 // is nullptr. 7605 if (EncounteredME) { 7606 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7607 unsigned FieldIndex = FD->getFieldIndex(); 7608 7609 // Update info about the lowest and highest elements for this struct 7610 if (!PartialStruct.Base.isValid()) { 7611 PartialStruct.LowestElem = {FieldIndex, LB}; 7612 PartialStruct.HighestElem = {FieldIndex, LB}; 7613 PartialStruct.Base = BP; 7614 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7615 PartialStruct.LowestElem = {FieldIndex, LB}; 7616 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7617 PartialStruct.HighestElem = {FieldIndex, LB}; 7618 } 7619 } 7620 7621 // If we have a final array section, we are done with this expression. 7622 if (IsFinalArraySection) 7623 break; 7624 7625 // The pointer becomes the base for the next element. 7626 if (Next != CE) 7627 BP = LB; 7628 7629 IsExpressionFirstInfo = false; 7630 IsCaptureFirstInfo = false; 7631 } 7632 } 7633 } 7634 7635 /// Return the adjusted map modifiers if the declaration a capture refers to 7636 /// appears in a first-private clause. This is expected to be used only with 7637 /// directives that start with 'target'. 7638 MappableExprsHandler::OpenMPOffloadMappingFlags 7639 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7640 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7641 7642 // A first private variable captured by reference will use only the 7643 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7644 // declaration is known as first-private in this handler. 7645 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7646 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7647 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7648 return MappableExprsHandler::OMP_MAP_ALWAYS | 7649 MappableExprsHandler::OMP_MAP_TO; 7650 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7651 return MappableExprsHandler::OMP_MAP_TO | 7652 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7653 return MappableExprsHandler::OMP_MAP_PRIVATE | 7654 MappableExprsHandler::OMP_MAP_TO; 7655 } 7656 return MappableExprsHandler::OMP_MAP_TO | 7657 MappableExprsHandler::OMP_MAP_FROM; 7658 } 7659 7660 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7661 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. 7662 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7663 << 48); 7664 } 7665 7666 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7667 OpenMPOffloadMappingFlags MemberOfFlag) { 7668 // If the entry is PTR_AND_OBJ but has not been marked with the special 7669 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7670 // marked as MEMBER_OF. 7671 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7672 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7673 return; 7674 7675 // Reset the placeholder value to prepare the flag for the assignment of the 7676 // proper MEMBER_OF value. 7677 Flags &= ~OMP_MAP_MEMBER_OF; 7678 Flags |= MemberOfFlag; 7679 } 7680 7681 void getPlainLayout(const CXXRecordDecl *RD, 7682 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7683 bool AsBase) const { 7684 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7685 7686 llvm::StructType *St = 7687 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7688 7689 unsigned NumElements = St->getNumElements(); 7690 llvm::SmallVector< 7691 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7692 RecordLayout(NumElements); 7693 7694 // Fill bases. 7695 for (const auto &I : RD->bases()) { 7696 if (I.isVirtual()) 7697 continue; 7698 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7699 // Ignore empty bases. 7700 if (Base->isEmpty() || CGF.getContext() 7701 .getASTRecordLayout(Base) 7702 .getNonVirtualSize() 7703 .isZero()) 7704 continue; 7705 7706 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7707 RecordLayout[FieldIndex] = Base; 7708 } 7709 // Fill in virtual bases. 7710 for (const auto &I : RD->vbases()) { 7711 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7712 // Ignore empty bases. 7713 if (Base->isEmpty()) 7714 continue; 7715 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7716 if (RecordLayout[FieldIndex]) 7717 continue; 7718 RecordLayout[FieldIndex] = Base; 7719 } 7720 // Fill in all the fields. 7721 assert(!RD->isUnion() && "Unexpected union."); 7722 for (const auto *Field : RD->fields()) { 7723 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7724 // will fill in later.) 7725 if (!Field->isBitField()) { 7726 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7727 RecordLayout[FieldIndex] = Field; 7728 } 7729 } 7730 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7731 &Data : RecordLayout) { 7732 if (Data.isNull()) 7733 continue; 7734 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7735 getPlainLayout(Base, Layout, /*AsBase=*/true); 7736 else 7737 Layout.push_back(Data.get<const FieldDecl *>()); 7738 } 7739 } 7740 7741 public: 7742 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7743 : CurDir(Dir), CGF(CGF) { 7744 // Extract firstprivate clause information. 7745 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7746 for (const auto *D : C->varlists()) 7747 FirstPrivateDecls.insert( 7748 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 7749 // Extract device pointer clause information. 7750 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7751 for (auto L : C->component_lists()) 7752 DevPointersMap[L.first].push_back(L.second); 7753 } 7754 7755 /// Generate code for the combined entry if we have a partially mapped struct 7756 /// and take care of the mapping flags of the arguments corresponding to 7757 /// individual struct members. 7758 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7759 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7760 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7761 const StructRangeInfoTy &PartialStruct) const { 7762 // Base is the base of the struct 7763 BasePointers.push_back(PartialStruct.Base.getPointer()); 7764 // Pointer is the address of the lowest element 7765 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7766 Pointers.push_back(LB); 7767 // Size is (addr of {highest+1} element) - (addr of lowest element) 7768 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7769 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7770 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7771 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7772 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7773 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy, 7774 /*isSinged=*/false); 7775 Sizes.push_back(Size); 7776 // Map type is always TARGET_PARAM 7777 Types.push_back(OMP_MAP_TARGET_PARAM); 7778 // Remove TARGET_PARAM flag from the first element 7779 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7780 7781 // All other current entries will be MEMBER_OF the combined entry 7782 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7783 // 0xFFFF in the MEMBER_OF field). 7784 OpenMPOffloadMappingFlags MemberOfFlag = 7785 getMemberOfFlag(BasePointers.size() - 1); 7786 for (auto &M : CurTypes) 7787 setCorrectMemberOfFlag(M, MemberOfFlag); 7788 } 7789 7790 /// Generate all the base pointers, section pointers, sizes and map 7791 /// types for the extracted mappable expressions. Also, for each item that 7792 /// relates with a device pointer, a pair of the relevant declaration and 7793 /// index where it occurs is appended to the device pointers info array. 7794 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7795 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7796 MapFlagsArrayTy &Types) const { 7797 // We have to process the component lists that relate with the same 7798 // declaration in a single chunk so that we can generate the map flags 7799 // correctly. Therefore, we organize all lists in a map. 7800 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7801 7802 // Helper function to fill the information map for the different supported 7803 // clauses. 7804 auto &&InfoGen = [&Info]( 7805 const ValueDecl *D, 7806 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7807 OpenMPMapClauseKind MapType, 7808 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7809 bool ReturnDevicePointer, bool IsImplicit) { 7810 const ValueDecl *VD = 7811 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7812 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7813 IsImplicit); 7814 }; 7815 7816 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7817 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7818 for (const auto &L : C->component_lists()) { 7819 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7820 /*ReturnDevicePointer=*/false, C->isImplicit()); 7821 } 7822 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 7823 for (const auto &L : C->component_lists()) { 7824 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7825 /*ReturnDevicePointer=*/false, C->isImplicit()); 7826 } 7827 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 7828 for (const auto &L : C->component_lists()) { 7829 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7830 /*ReturnDevicePointer=*/false, C->isImplicit()); 7831 } 7832 7833 // Look at the use_device_ptr clause information and mark the existing map 7834 // entries as such. If there is no map information for an entry in the 7835 // use_device_ptr list, we create one with map type 'alloc' and zero size 7836 // section. It is the user fault if that was not mapped before. If there is 7837 // no map information and the pointer is a struct member, then we defer the 7838 // emission of that entry until the whole struct has been processed. 7839 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7840 DeferredInfo; 7841 7842 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7843 for (const auto *C : 7844 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { 7845 for (const auto &L : C->component_lists()) { 7846 assert(!L.second.empty() && "Not expecting empty list of components!"); 7847 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7848 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7849 const Expr *IE = L.second.back().getAssociatedExpression(); 7850 // If the first component is a member expression, we have to look into 7851 // 'this', which maps to null in the map of map information. Otherwise 7852 // look directly for the information. 7853 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7854 7855 // We potentially have map information for this declaration already. 7856 // Look for the first set of components that refer to it. 7857 if (It != Info.end()) { 7858 auto CI = std::find_if( 7859 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7860 return MI.Components.back().getAssociatedDeclaration() == VD; 7861 }); 7862 // If we found a map entry, signal that the pointer has to be returned 7863 // and move on to the next declaration. 7864 if (CI != It->second.end()) { 7865 CI->ReturnDevicePointer = true; 7866 continue; 7867 } 7868 } 7869 7870 // We didn't find any match in our map information - generate a zero 7871 // size array section - if the pointer is a struct member we defer this 7872 // action until the whole struct has been processed. 7873 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 7874 if (isa<MemberExpr>(IE)) { 7875 // Insert the pointer into Info to be processed by 7876 // generateInfoForComponentList. Because it is a member pointer 7877 // without a pointee, no entry will be generated for it, therefore 7878 // we need to generate one after the whole struct has been processed. 7879 // Nonetheless, generateInfoForComponentList must be called to take 7880 // the pointer into account for the calculation of the range of the 7881 // partial struct. 7882 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 7883 /*ReturnDevicePointer=*/false, C->isImplicit()); 7884 DeferredInfo[nullptr].emplace_back(IE, VD); 7885 } else { 7886 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7887 this->CGF.EmitLValue(IE), IE->getExprLoc()); 7888 BasePointers.emplace_back(Ptr, VD); 7889 Pointers.push_back(Ptr); 7890 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7891 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 7892 } 7893 } 7894 } 7895 7896 for (const auto &M : Info) { 7897 // We need to know when we generate information for the first component 7898 // associated with a capture, because the mapping flags depend on it. 7899 bool IsFirstComponentList = true; 7900 7901 // Temporary versions of arrays 7902 MapBaseValuesArrayTy CurBasePointers; 7903 MapValuesArrayTy CurPointers; 7904 MapValuesArrayTy CurSizes; 7905 MapFlagsArrayTy CurTypes; 7906 StructRangeInfoTy PartialStruct; 7907 7908 for (const MapInfo &L : M.second) { 7909 assert(!L.Components.empty() && 7910 "Not expecting declaration with no component lists."); 7911 7912 // Remember the current base pointer index. 7913 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 7914 // FIXME: MSVC 2013 seems to require this-> to find the member method. 7915 this->generateInfoForComponentList( 7916 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 7917 CurPointers, CurSizes, CurTypes, PartialStruct, 7918 IsFirstComponentList, L.IsImplicit); 7919 7920 // If this entry relates with a device pointer, set the relevant 7921 // declaration and add the 'return pointer' flag. 7922 if (L.ReturnDevicePointer) { 7923 assert(CurBasePointers.size() > CurrentBasePointersIdx && 7924 "Unexpected number of mapped base pointers."); 7925 7926 const ValueDecl *RelevantVD = 7927 L.Components.back().getAssociatedDeclaration(); 7928 assert(RelevantVD && 7929 "No relevant declaration related with device pointer??"); 7930 7931 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 7932 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 7933 } 7934 IsFirstComponentList = false; 7935 } 7936 7937 // Append any pending zero-length pointers which are struct members and 7938 // used with use_device_ptr. 7939 auto CI = DeferredInfo.find(M.first); 7940 if (CI != DeferredInfo.end()) { 7941 for (const DeferredDevicePtrEntryTy &L : CI->second) { 7942 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 7943 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7944 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 7945 CurBasePointers.emplace_back(BasePtr, L.VD); 7946 CurPointers.push_back(Ptr); 7947 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7948 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 7949 // value MEMBER_OF=FFFF so that the entry is later updated with the 7950 // correct value of MEMBER_OF. 7951 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 7952 OMP_MAP_MEMBER_OF); 7953 } 7954 } 7955 7956 // If there is an entry in PartialStruct it means we have a struct with 7957 // individual members mapped. Emit an extra combined entry. 7958 if (PartialStruct.Base.isValid()) 7959 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 7960 PartialStruct); 7961 7962 // We need to append the results of this capture to what we already have. 7963 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7964 Pointers.append(CurPointers.begin(), CurPointers.end()); 7965 Sizes.append(CurSizes.begin(), CurSizes.end()); 7966 Types.append(CurTypes.begin(), CurTypes.end()); 7967 } 7968 } 7969 7970 /// Emit capture info for lambdas for variables captured by reference. 7971 void generateInfoForLambdaCaptures( 7972 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 7973 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7974 MapFlagsArrayTy &Types, 7975 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 7976 const auto *RD = VD->getType() 7977 .getCanonicalType() 7978 .getNonReferenceType() 7979 ->getAsCXXRecordDecl(); 7980 if (!RD || !RD->isLambda()) 7981 return; 7982 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 7983 LValue VDLVal = CGF.MakeAddrLValue( 7984 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 7985 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 7986 FieldDecl *ThisCapture = nullptr; 7987 RD->getCaptureFields(Captures, ThisCapture); 7988 if (ThisCapture) { 7989 LValue ThisLVal = 7990 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 7991 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 7992 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 7993 BasePointers.push_back(ThisLVal.getPointer()); 7994 Pointers.push_back(ThisLValVal.getPointer()); 7995 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 7996 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 7997 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 7998 } 7999 for (const LambdaCapture &LC : RD->captures()) { 8000 if (LC.getCaptureKind() != LCK_ByRef) 8001 continue; 8002 const VarDecl *VD = LC.getCapturedVar(); 8003 auto It = Captures.find(VD); 8004 assert(It != Captures.end() && "Found lambda capture without field."); 8005 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8006 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8007 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8008 BasePointers.push_back(VarLVal.getPointer()); 8009 Pointers.push_back(VarLValVal.getPointer()); 8010 Sizes.push_back(CGF.getTypeSize( 8011 VD->getType().getCanonicalType().getNonReferenceType())); 8012 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8013 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8014 } 8015 } 8016 8017 /// Set correct indices for lambdas captures. 8018 void adjustMemberOfForLambdaCaptures( 8019 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8020 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8021 MapFlagsArrayTy &Types) const { 8022 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8023 // Set correct member_of idx for all implicit lambda captures. 8024 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8025 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8026 continue; 8027 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8028 assert(BasePtr && "Unable to find base lambda address."); 8029 int TgtIdx = -1; 8030 for (unsigned J = I; J > 0; --J) { 8031 unsigned Idx = J - 1; 8032 if (Pointers[Idx] != BasePtr) 8033 continue; 8034 TgtIdx = Idx; 8035 break; 8036 } 8037 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8038 // All other current entries will be MEMBER_OF the combined entry 8039 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8040 // 0xFFFF in the MEMBER_OF field). 8041 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8042 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8043 } 8044 } 8045 8046 /// Generate the base pointers, section pointers, sizes and map types 8047 /// associated to a given capture. 8048 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8049 llvm::Value *Arg, 8050 MapBaseValuesArrayTy &BasePointers, 8051 MapValuesArrayTy &Pointers, 8052 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8053 StructRangeInfoTy &PartialStruct) const { 8054 assert(!Cap->capturesVariableArrayType() && 8055 "Not expecting to generate map info for a variable array type!"); 8056 8057 // We need to know when we generating information for the first component 8058 const ValueDecl *VD = Cap->capturesThis() 8059 ? nullptr 8060 : Cap->getCapturedVar()->getCanonicalDecl(); 8061 8062 // If this declaration appears in a is_device_ptr clause we just have to 8063 // pass the pointer by value. If it is a reference to a declaration, we just 8064 // pass its value. 8065 if (DevPointersMap.count(VD)) { 8066 BasePointers.emplace_back(Arg, VD); 8067 Pointers.push_back(Arg); 8068 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 8069 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8070 return; 8071 } 8072 8073 using MapData = 8074 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8075 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8076 SmallVector<MapData, 4> DeclComponentLists; 8077 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 8078 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8079 for (const auto &L : C->decl_component_lists(VD)) { 8080 assert(L.first == VD && 8081 "We got information for the wrong declaration??"); 8082 assert(!L.second.empty() && 8083 "Not expecting declaration with no component lists."); 8084 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8085 C->getMapTypeModifiers(), 8086 C->isImplicit()); 8087 } 8088 } 8089 8090 // Find overlapping elements (including the offset from the base element). 8091 llvm::SmallDenseMap< 8092 const MapData *, 8093 llvm::SmallVector< 8094 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8095 4> 8096 OverlappedData; 8097 size_t Count = 0; 8098 for (const MapData &L : DeclComponentLists) { 8099 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8100 OpenMPMapClauseKind MapType; 8101 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8102 bool IsImplicit; 8103 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8104 ++Count; 8105 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8106 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8107 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8108 auto CI = Components.rbegin(); 8109 auto CE = Components.rend(); 8110 auto SI = Components1.rbegin(); 8111 auto SE = Components1.rend(); 8112 for (; CI != CE && SI != SE; ++CI, ++SI) { 8113 if (CI->getAssociatedExpression()->getStmtClass() != 8114 SI->getAssociatedExpression()->getStmtClass()) 8115 break; 8116 // Are we dealing with different variables/fields? 8117 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8118 break; 8119 } 8120 // Found overlapping if, at least for one component, reached the head of 8121 // the components list. 8122 if (CI == CE || SI == SE) { 8123 assert((CI != CE || SI != SE) && 8124 "Unexpected full match of the mapping components."); 8125 const MapData &BaseData = CI == CE ? L : L1; 8126 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8127 SI == SE ? Components : Components1; 8128 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8129 OverlappedElements.getSecond().push_back(SubData); 8130 } 8131 } 8132 } 8133 // Sort the overlapped elements for each item. 8134 llvm::SmallVector<const FieldDecl *, 4> Layout; 8135 if (!OverlappedData.empty()) { 8136 if (const auto *CRD = 8137 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8138 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8139 else { 8140 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8141 Layout.append(RD->field_begin(), RD->field_end()); 8142 } 8143 } 8144 for (auto &Pair : OverlappedData) { 8145 llvm::sort( 8146 Pair.getSecond(), 8147 [&Layout]( 8148 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8149 OMPClauseMappableExprCommon::MappableExprComponentListRef 8150 Second) { 8151 auto CI = First.rbegin(); 8152 auto CE = First.rend(); 8153 auto SI = Second.rbegin(); 8154 auto SE = Second.rend(); 8155 for (; CI != CE && SI != SE; ++CI, ++SI) { 8156 if (CI->getAssociatedExpression()->getStmtClass() != 8157 SI->getAssociatedExpression()->getStmtClass()) 8158 break; 8159 // Are we dealing with different variables/fields? 8160 if (CI->getAssociatedDeclaration() != 8161 SI->getAssociatedDeclaration()) 8162 break; 8163 } 8164 8165 // Lists contain the same elements. 8166 if (CI == CE && SI == SE) 8167 return false; 8168 8169 // List with less elements is less than list with more elements. 8170 if (CI == CE || SI == SE) 8171 return CI == CE; 8172 8173 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8174 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8175 if (FD1->getParent() == FD2->getParent()) 8176 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8177 const auto It = 8178 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8179 return FD == FD1 || FD == FD2; 8180 }); 8181 return *It == FD1; 8182 }); 8183 } 8184 8185 // Associated with a capture, because the mapping flags depend on it. 8186 // Go through all of the elements with the overlapped elements. 8187 for (const auto &Pair : OverlappedData) { 8188 const MapData &L = *Pair.getFirst(); 8189 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8190 OpenMPMapClauseKind MapType; 8191 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8192 bool IsImplicit; 8193 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8194 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8195 OverlappedComponents = Pair.getSecond(); 8196 bool IsFirstComponentList = true; 8197 generateInfoForComponentList(MapType, MapModifiers, Components, 8198 BasePointers, Pointers, Sizes, Types, 8199 PartialStruct, IsFirstComponentList, 8200 IsImplicit, OverlappedComponents); 8201 } 8202 // Go through other elements without overlapped elements. 8203 bool IsFirstComponentList = OverlappedData.empty(); 8204 for (const MapData &L : DeclComponentLists) { 8205 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8206 OpenMPMapClauseKind MapType; 8207 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8208 bool IsImplicit; 8209 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8210 auto It = OverlappedData.find(&L); 8211 if (It == OverlappedData.end()) 8212 generateInfoForComponentList(MapType, MapModifiers, Components, 8213 BasePointers, Pointers, Sizes, Types, 8214 PartialStruct, IsFirstComponentList, 8215 IsImplicit); 8216 IsFirstComponentList = false; 8217 } 8218 } 8219 8220 /// Generate the base pointers, section pointers, sizes and map types 8221 /// associated with the declare target link variables. 8222 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8223 MapValuesArrayTy &Pointers, 8224 MapValuesArrayTy &Sizes, 8225 MapFlagsArrayTy &Types) const { 8226 // Map other list items in the map clause which are not captured variables 8227 // but "declare target link" global variables., 8228 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8229 for (const auto &L : C->component_lists()) { 8230 if (!L.first) 8231 continue; 8232 const auto *VD = dyn_cast<VarDecl>(L.first); 8233 if (!VD) 8234 continue; 8235 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8236 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8237 if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8238 continue; 8239 StructRangeInfoTy PartialStruct; 8240 generateInfoForComponentList( 8241 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8242 Pointers, Sizes, Types, PartialStruct, 8243 /*IsFirstComponentList=*/true, C->isImplicit()); 8244 assert(!PartialStruct.Base.isValid() && 8245 "No partial structs for declare target link expected."); 8246 } 8247 } 8248 } 8249 8250 /// Generate the default map information for a given capture \a CI, 8251 /// record field declaration \a RI and captured value \a CV. 8252 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8253 const FieldDecl &RI, llvm::Value *CV, 8254 MapBaseValuesArrayTy &CurBasePointers, 8255 MapValuesArrayTy &CurPointers, 8256 MapValuesArrayTy &CurSizes, 8257 MapFlagsArrayTy &CurMapTypes) const { 8258 // Do the default mapping. 8259 if (CI.capturesThis()) { 8260 CurBasePointers.push_back(CV); 8261 CurPointers.push_back(CV); 8262 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8263 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 8264 // Default map type. 8265 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8266 } else if (CI.capturesVariableByCopy()) { 8267 CurBasePointers.push_back(CV); 8268 CurPointers.push_back(CV); 8269 if (!RI.getType()->isAnyPointerType()) { 8270 // We have to signal to the runtime captures passed by value that are 8271 // not pointers. 8272 CurMapTypes.push_back(OMP_MAP_LITERAL); 8273 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 8274 } else { 8275 // Pointers are implicitly mapped with a zero size and no flags 8276 // (other than first map that is added for all implicit maps). 8277 CurMapTypes.push_back(OMP_MAP_NONE); 8278 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 8279 } 8280 } else { 8281 assert(CI.capturesVariable() && "Expected captured reference."); 8282 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8283 QualType ElementType = PtrTy->getPointeeType(); 8284 CurSizes.push_back(CGF.getTypeSize(ElementType)); 8285 // The default map type for a scalar/complex type is 'to' because by 8286 // default the value doesn't have to be retrieved. For an aggregate 8287 // type, the default is 'tofrom'. 8288 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8289 const VarDecl *VD = CI.getCapturedVar(); 8290 if (FirstPrivateDecls.count(VD) && 8291 VD->getType().isConstant(CGF.getContext())) { 8292 llvm::Constant *Addr = 8293 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8294 // Copy the value of the original variable to the new global copy. 8295 CGF.Builder.CreateMemCpy( 8296 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8297 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8298 CurSizes.back(), /*isVolatile=*/false); 8299 // Use new global variable as the base pointers. 8300 CurBasePointers.push_back(Addr); 8301 CurPointers.push_back(Addr); 8302 } else { 8303 CurBasePointers.push_back(CV); 8304 if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) { 8305 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8306 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8307 AlignmentSource::Decl)); 8308 CurPointers.push_back(PtrAddr.getPointer()); 8309 } else { 8310 CurPointers.push_back(CV); 8311 } 8312 } 8313 } 8314 // Every default map produces a single argument which is a target parameter. 8315 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8316 8317 // Add flag stating this is an implicit map. 8318 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8319 } 8320 }; 8321 8322 enum OpenMPOffloadingReservedDeviceIDs { 8323 /// Device ID if the device was not defined, runtime should get it 8324 /// from environment variables in the spec. 8325 OMP_DEVICEID_UNDEF = -1, 8326 }; 8327 } // anonymous namespace 8328 8329 /// Emit the arrays used to pass the captures and map information to the 8330 /// offloading runtime library. If there is no map or capture information, 8331 /// return nullptr by reference. 8332 static void 8333 emitOffloadingArrays(CodeGenFunction &CGF, 8334 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8335 MappableExprsHandler::MapValuesArrayTy &Pointers, 8336 MappableExprsHandler::MapValuesArrayTy &Sizes, 8337 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8338 CGOpenMPRuntime::TargetDataInfo &Info) { 8339 CodeGenModule &CGM = CGF.CGM; 8340 ASTContext &Ctx = CGF.getContext(); 8341 8342 // Reset the array information. 8343 Info.clearArrayInfo(); 8344 Info.NumberOfPtrs = BasePointers.size(); 8345 8346 if (Info.NumberOfPtrs) { 8347 // Detect if we have any capture size requiring runtime evaluation of the 8348 // size so that a constant array could be eventually used. 8349 bool hasRuntimeEvaluationCaptureSize = false; 8350 for (llvm::Value *S : Sizes) 8351 if (!isa<llvm::Constant>(S)) { 8352 hasRuntimeEvaluationCaptureSize = true; 8353 break; 8354 } 8355 8356 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8357 QualType PointerArrayType = 8358 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 8359 /*IndexTypeQuals=*/0); 8360 8361 Info.BasePointersArray = 8362 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8363 Info.PointersArray = 8364 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8365 8366 // If we don't have any VLA types or other types that require runtime 8367 // evaluation, we can use a constant array for the map sizes, otherwise we 8368 // need to fill up the arrays as we do for the pointers. 8369 if (hasRuntimeEvaluationCaptureSize) { 8370 QualType SizeArrayType = Ctx.getConstantArrayType( 8371 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 8372 /*IndexTypeQuals=*/0); 8373 Info.SizesArray = 8374 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8375 } else { 8376 // We expect all the sizes to be constant, so we collect them to create 8377 // a constant array. 8378 SmallVector<llvm::Constant *, 16> ConstSizes; 8379 for (llvm::Value *S : Sizes) 8380 ConstSizes.push_back(cast<llvm::Constant>(S)); 8381 8382 auto *SizesArrayInit = llvm::ConstantArray::get( 8383 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 8384 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8385 auto *SizesArrayGbl = new llvm::GlobalVariable( 8386 CGM.getModule(), SizesArrayInit->getType(), 8387 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8388 SizesArrayInit, Name); 8389 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8390 Info.SizesArray = SizesArrayGbl; 8391 } 8392 8393 // The map types are always constant so we don't need to generate code to 8394 // fill arrays. Instead, we create an array constant. 8395 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8396 llvm::copy(MapTypes, Mapping.begin()); 8397 llvm::Constant *MapTypesArrayInit = 8398 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8399 std::string MaptypesName = 8400 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8401 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8402 CGM.getModule(), MapTypesArrayInit->getType(), 8403 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8404 MapTypesArrayInit, MaptypesName); 8405 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8406 Info.MapTypesArray = MapTypesArrayGbl; 8407 8408 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8409 llvm::Value *BPVal = *BasePointers[I]; 8410 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8411 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8412 Info.BasePointersArray, 0, I); 8413 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8414 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8415 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8416 CGF.Builder.CreateStore(BPVal, BPAddr); 8417 8418 if (Info.requiresDevicePointerInfo()) 8419 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8420 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8421 8422 llvm::Value *PVal = Pointers[I]; 8423 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8424 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8425 Info.PointersArray, 0, I); 8426 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8427 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8428 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8429 CGF.Builder.CreateStore(PVal, PAddr); 8430 8431 if (hasRuntimeEvaluationCaptureSize) { 8432 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8433 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 8434 Info.SizesArray, 8435 /*Idx0=*/0, 8436 /*Idx1=*/I); 8437 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 8438 CGF.Builder.CreateStore( 8439 CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), 8440 SAddr); 8441 } 8442 } 8443 } 8444 } 8445 /// Emit the arguments to be passed to the runtime library based on the 8446 /// arrays of pointers, sizes and map types. 8447 static void emitOffloadingArraysArgument( 8448 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8449 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8450 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8451 CodeGenModule &CGM = CGF.CGM; 8452 if (Info.NumberOfPtrs) { 8453 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8454 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8455 Info.BasePointersArray, 8456 /*Idx0=*/0, /*Idx1=*/0); 8457 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8458 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8459 Info.PointersArray, 8460 /*Idx0=*/0, 8461 /*Idx1=*/0); 8462 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8463 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 8464 /*Idx0=*/0, /*Idx1=*/0); 8465 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8466 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8467 Info.MapTypesArray, 8468 /*Idx0=*/0, 8469 /*Idx1=*/0); 8470 } else { 8471 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8472 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8473 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 8474 MapTypesArrayArg = 8475 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8476 } 8477 } 8478 8479 /// Check for inner distribute directive. 8480 static const OMPExecutableDirective * 8481 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8482 const auto *CS = D.getInnermostCapturedStmt(); 8483 const auto *Body = 8484 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8485 const Stmt *ChildStmt = 8486 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8487 8488 if (const auto *NestedDir = 8489 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8490 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8491 switch (D.getDirectiveKind()) { 8492 case OMPD_target: 8493 if (isOpenMPDistributeDirective(DKind)) 8494 return NestedDir; 8495 if (DKind == OMPD_teams) { 8496 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8497 /*IgnoreCaptured=*/true); 8498 if (!Body) 8499 return nullptr; 8500 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8501 if (const auto *NND = 8502 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8503 DKind = NND->getDirectiveKind(); 8504 if (isOpenMPDistributeDirective(DKind)) 8505 return NND; 8506 } 8507 } 8508 return nullptr; 8509 case OMPD_target_teams: 8510 if (isOpenMPDistributeDirective(DKind)) 8511 return NestedDir; 8512 return nullptr; 8513 case OMPD_target_parallel: 8514 case OMPD_target_simd: 8515 case OMPD_target_parallel_for: 8516 case OMPD_target_parallel_for_simd: 8517 return nullptr; 8518 case OMPD_target_teams_distribute: 8519 case OMPD_target_teams_distribute_simd: 8520 case OMPD_target_teams_distribute_parallel_for: 8521 case OMPD_target_teams_distribute_parallel_for_simd: 8522 case OMPD_parallel: 8523 case OMPD_for: 8524 case OMPD_parallel_for: 8525 case OMPD_parallel_sections: 8526 case OMPD_for_simd: 8527 case OMPD_parallel_for_simd: 8528 case OMPD_cancel: 8529 case OMPD_cancellation_point: 8530 case OMPD_ordered: 8531 case OMPD_threadprivate: 8532 case OMPD_allocate: 8533 case OMPD_task: 8534 case OMPD_simd: 8535 case OMPD_sections: 8536 case OMPD_section: 8537 case OMPD_single: 8538 case OMPD_master: 8539 case OMPD_critical: 8540 case OMPD_taskyield: 8541 case OMPD_barrier: 8542 case OMPD_taskwait: 8543 case OMPD_taskgroup: 8544 case OMPD_atomic: 8545 case OMPD_flush: 8546 case OMPD_teams: 8547 case OMPD_target_data: 8548 case OMPD_target_exit_data: 8549 case OMPD_target_enter_data: 8550 case OMPD_distribute: 8551 case OMPD_distribute_simd: 8552 case OMPD_distribute_parallel_for: 8553 case OMPD_distribute_parallel_for_simd: 8554 case OMPD_teams_distribute: 8555 case OMPD_teams_distribute_simd: 8556 case OMPD_teams_distribute_parallel_for: 8557 case OMPD_teams_distribute_parallel_for_simd: 8558 case OMPD_target_update: 8559 case OMPD_declare_simd: 8560 case OMPD_declare_target: 8561 case OMPD_end_declare_target: 8562 case OMPD_declare_reduction: 8563 case OMPD_declare_mapper: 8564 case OMPD_taskloop: 8565 case OMPD_taskloop_simd: 8566 case OMPD_requires: 8567 case OMPD_unknown: 8568 llvm_unreachable("Unexpected directive."); 8569 } 8570 } 8571 8572 return nullptr; 8573 } 8574 8575 void CGOpenMPRuntime::emitTargetNumIterationsCall( 8576 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 8577 const llvm::function_ref<llvm::Value *( 8578 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 8579 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 8580 const OMPExecutableDirective *TD = &D; 8581 // Get nested teams distribute kind directive, if any. 8582 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 8583 TD = getNestedDistributeDirective(CGM.getContext(), D); 8584 if (!TD) 8585 return; 8586 const auto *LD = cast<OMPLoopDirective>(TD); 8587 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 8588 PrePostActionTy &) { 8589 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 8590 8591 // Emit device ID if any. 8592 llvm::Value *DeviceID; 8593 if (Device) 8594 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8595 CGF.Int64Ty, /*isSigned=*/true); 8596 else 8597 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8598 8599 llvm::Value *Args[] = {DeviceID, NumIterations}; 8600 CGF.EmitRuntimeCall( 8601 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 8602 }; 8603 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 8604 } 8605 8606 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 8607 const OMPExecutableDirective &D, 8608 llvm::Function *OutlinedFn, 8609 llvm::Value *OutlinedFnID, 8610 const Expr *IfCond, const Expr *Device) { 8611 if (!CGF.HaveInsertPoint()) 8612 return; 8613 8614 assert(OutlinedFn && "Invalid outlined function!"); 8615 8616 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 8617 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 8618 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 8619 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 8620 PrePostActionTy &) { 8621 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8622 }; 8623 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 8624 8625 CodeGenFunction::OMPTargetDataInfo InputInfo; 8626 llvm::Value *MapTypesArray = nullptr; 8627 // Fill up the pointer arrays and transfer execution to the device. 8628 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 8629 &MapTypesArray, &CS, RequiresOuterTask, 8630 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 8631 // On top of the arrays that were filled up, the target offloading call 8632 // takes as arguments the device id as well as the host pointer. The host 8633 // pointer is used by the runtime library to identify the current target 8634 // region, so it only has to be unique and not necessarily point to 8635 // anything. It could be the pointer to the outlined function that 8636 // implements the target region, but we aren't using that so that the 8637 // compiler doesn't need to keep that, and could therefore inline the host 8638 // function if proven worthwhile during optimization. 8639 8640 // From this point on, we need to have an ID of the target region defined. 8641 assert(OutlinedFnID && "Invalid outlined function ID!"); 8642 8643 // Emit device ID if any. 8644 llvm::Value *DeviceID; 8645 if (Device) { 8646 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8647 CGF.Int64Ty, /*isSigned=*/true); 8648 } else { 8649 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8650 } 8651 8652 // Emit the number of elements in the offloading arrays. 8653 llvm::Value *PointerNum = 8654 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8655 8656 // Return value of the runtime offloading call. 8657 llvm::Value *Return; 8658 8659 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 8660 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 8661 8662 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8663 // The target region is an outlined function launched by the runtime 8664 // via calls __tgt_target() or __tgt_target_teams(). 8665 // 8666 // __tgt_target() launches a target region with one team and one thread, 8667 // executing a serial region. This master thread may in turn launch 8668 // more threads within its team upon encountering a parallel region, 8669 // however, no additional teams can be launched on the device. 8670 // 8671 // __tgt_target_teams() launches a target region with one or more teams, 8672 // each with one or more threads. This call is required for target 8673 // constructs such as: 8674 // 'target teams' 8675 // 'target' / 'teams' 8676 // 'target teams distribute parallel for' 8677 // 'target parallel' 8678 // and so on. 8679 // 8680 // Note that on the host and CPU targets, the runtime implementation of 8681 // these calls simply call the outlined function without forking threads. 8682 // The outlined functions themselves have runtime calls to 8683 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 8684 // the compiler in emitTeamsCall() and emitParallelCall(). 8685 // 8686 // In contrast, on the NVPTX target, the implementation of 8687 // __tgt_target_teams() launches a GPU kernel with the requested number 8688 // of teams and threads so no additional calls to the runtime are required. 8689 if (NumTeams) { 8690 // If we have NumTeams defined this means that we have an enclosed teams 8691 // region. Therefore we also expect to have NumThreads defined. These two 8692 // values should be defined in the presence of a teams directive, 8693 // regardless of having any clauses associated. If the user is using teams 8694 // but no clauses, these two values will be the default that should be 8695 // passed to the runtime library - a 32-bit integer with the value zero. 8696 assert(NumThreads && "Thread limit expression should be available along " 8697 "with number of teams."); 8698 llvm::Value *OffloadingArgs[] = {DeviceID, 8699 OutlinedFnID, 8700 PointerNum, 8701 InputInfo.BasePointersArray.getPointer(), 8702 InputInfo.PointersArray.getPointer(), 8703 InputInfo.SizesArray.getPointer(), 8704 MapTypesArray, 8705 NumTeams, 8706 NumThreads}; 8707 Return = CGF.EmitRuntimeCall( 8708 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 8709 : OMPRTL__tgt_target_teams), 8710 OffloadingArgs); 8711 } else { 8712 llvm::Value *OffloadingArgs[] = {DeviceID, 8713 OutlinedFnID, 8714 PointerNum, 8715 InputInfo.BasePointersArray.getPointer(), 8716 InputInfo.PointersArray.getPointer(), 8717 InputInfo.SizesArray.getPointer(), 8718 MapTypesArray}; 8719 Return = CGF.EmitRuntimeCall( 8720 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 8721 : OMPRTL__tgt_target), 8722 OffloadingArgs); 8723 } 8724 8725 // Check the error code and execute the host version if required. 8726 llvm::BasicBlock *OffloadFailedBlock = 8727 CGF.createBasicBlock("omp_offload.failed"); 8728 llvm::BasicBlock *OffloadContBlock = 8729 CGF.createBasicBlock("omp_offload.cont"); 8730 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 8731 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 8732 8733 CGF.EmitBlock(OffloadFailedBlock); 8734 if (RequiresOuterTask) { 8735 CapturedVars.clear(); 8736 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8737 } 8738 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8739 CGF.EmitBranch(OffloadContBlock); 8740 8741 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 8742 }; 8743 8744 // Notify that the host version must be executed. 8745 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 8746 RequiresOuterTask](CodeGenFunction &CGF, 8747 PrePostActionTy &) { 8748 if (RequiresOuterTask) { 8749 CapturedVars.clear(); 8750 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8751 } 8752 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8753 }; 8754 8755 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 8756 &CapturedVars, RequiresOuterTask, 8757 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 8758 // Fill up the arrays with all the captured variables. 8759 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8760 MappableExprsHandler::MapValuesArrayTy Pointers; 8761 MappableExprsHandler::MapValuesArrayTy Sizes; 8762 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8763 8764 // Get mappable expression information. 8765 MappableExprsHandler MEHandler(D, CGF); 8766 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 8767 8768 auto RI = CS.getCapturedRecordDecl()->field_begin(); 8769 auto CV = CapturedVars.begin(); 8770 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 8771 CE = CS.capture_end(); 8772 CI != CE; ++CI, ++RI, ++CV) { 8773 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 8774 MappableExprsHandler::MapValuesArrayTy CurPointers; 8775 MappableExprsHandler::MapValuesArrayTy CurSizes; 8776 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 8777 MappableExprsHandler::StructRangeInfoTy PartialStruct; 8778 8779 // VLA sizes are passed to the outlined region by copy and do not have map 8780 // information associated. 8781 if (CI->capturesVariableArrayType()) { 8782 CurBasePointers.push_back(*CV); 8783 CurPointers.push_back(*CV); 8784 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 8785 // Copy to the device as an argument. No need to retrieve it. 8786 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 8787 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 8788 } else { 8789 // If we have any information in the map clause, we use it, otherwise we 8790 // just do a default mapping. 8791 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 8792 CurSizes, CurMapTypes, PartialStruct); 8793 if (CurBasePointers.empty()) 8794 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 8795 CurPointers, CurSizes, CurMapTypes); 8796 // Generate correct mapping for variables captured by reference in 8797 // lambdas. 8798 if (CI->capturesVariable()) 8799 MEHandler.generateInfoForLambdaCaptures( 8800 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 8801 CurMapTypes, LambdaPointers); 8802 } 8803 // We expect to have at least an element of information for this capture. 8804 assert(!CurBasePointers.empty() && 8805 "Non-existing map pointer for capture!"); 8806 assert(CurBasePointers.size() == CurPointers.size() && 8807 CurBasePointers.size() == CurSizes.size() && 8808 CurBasePointers.size() == CurMapTypes.size() && 8809 "Inconsistent map information sizes!"); 8810 8811 // If there is an entry in PartialStruct it means we have a struct with 8812 // individual members mapped. Emit an extra combined entry. 8813 if (PartialStruct.Base.isValid()) 8814 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 8815 CurMapTypes, PartialStruct); 8816 8817 // We need to append the results of this capture to what we already have. 8818 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8819 Pointers.append(CurPointers.begin(), CurPointers.end()); 8820 Sizes.append(CurSizes.begin(), CurSizes.end()); 8821 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 8822 } 8823 // Adjust MEMBER_OF flags for the lambdas captures. 8824 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 8825 Pointers, MapTypes); 8826 // Map other list items in the map clause which are not captured variables 8827 // but "declare target link" global variables. 8828 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 8829 MapTypes); 8830 8831 TargetDataInfo Info; 8832 // Fill up the arrays and create the arguments. 8833 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8834 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8835 Info.PointersArray, Info.SizesArray, 8836 Info.MapTypesArray, Info); 8837 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8838 InputInfo.BasePointersArray = 8839 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8840 InputInfo.PointersArray = 8841 Address(Info.PointersArray, CGM.getPointerAlign()); 8842 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 8843 MapTypesArray = Info.MapTypesArray; 8844 if (RequiresOuterTask) 8845 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8846 else 8847 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8848 }; 8849 8850 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 8851 CodeGenFunction &CGF, PrePostActionTy &) { 8852 if (RequiresOuterTask) { 8853 CodeGenFunction::OMPTargetDataInfo InputInfo; 8854 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 8855 } else { 8856 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 8857 } 8858 }; 8859 8860 // If we have a target function ID it means that we need to support 8861 // offloading, otherwise, just execute on the host. We need to execute on host 8862 // regardless of the conditional in the if clause if, e.g., the user do not 8863 // specify target triples. 8864 if (OutlinedFnID) { 8865 if (IfCond) { 8866 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 8867 } else { 8868 RegionCodeGenTy ThenRCG(TargetThenGen); 8869 ThenRCG(CGF); 8870 } 8871 } else { 8872 RegionCodeGenTy ElseRCG(TargetElseGen); 8873 ElseRCG(CGF); 8874 } 8875 } 8876 8877 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 8878 StringRef ParentName) { 8879 if (!S) 8880 return; 8881 8882 // Codegen OMP target directives that offload compute to the device. 8883 bool RequiresDeviceCodegen = 8884 isa<OMPExecutableDirective>(S) && 8885 isOpenMPTargetExecutionDirective( 8886 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 8887 8888 if (RequiresDeviceCodegen) { 8889 const auto &E = *cast<OMPExecutableDirective>(S); 8890 unsigned DeviceID; 8891 unsigned FileID; 8892 unsigned Line; 8893 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 8894 FileID, Line); 8895 8896 // Is this a target region that should not be emitted as an entry point? If 8897 // so just signal we are done with this target region. 8898 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 8899 ParentName, Line)) 8900 return; 8901 8902 switch (E.getDirectiveKind()) { 8903 case OMPD_target: 8904 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 8905 cast<OMPTargetDirective>(E)); 8906 break; 8907 case OMPD_target_parallel: 8908 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 8909 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 8910 break; 8911 case OMPD_target_teams: 8912 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 8913 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 8914 break; 8915 case OMPD_target_teams_distribute: 8916 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 8917 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 8918 break; 8919 case OMPD_target_teams_distribute_simd: 8920 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 8921 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 8922 break; 8923 case OMPD_target_parallel_for: 8924 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 8925 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 8926 break; 8927 case OMPD_target_parallel_for_simd: 8928 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 8929 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 8930 break; 8931 case OMPD_target_simd: 8932 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 8933 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 8934 break; 8935 case OMPD_target_teams_distribute_parallel_for: 8936 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 8937 CGM, ParentName, 8938 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 8939 break; 8940 case OMPD_target_teams_distribute_parallel_for_simd: 8941 CodeGenFunction:: 8942 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 8943 CGM, ParentName, 8944 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 8945 break; 8946 case OMPD_parallel: 8947 case OMPD_for: 8948 case OMPD_parallel_for: 8949 case OMPD_parallel_sections: 8950 case OMPD_for_simd: 8951 case OMPD_parallel_for_simd: 8952 case OMPD_cancel: 8953 case OMPD_cancellation_point: 8954 case OMPD_ordered: 8955 case OMPD_threadprivate: 8956 case OMPD_allocate: 8957 case OMPD_task: 8958 case OMPD_simd: 8959 case OMPD_sections: 8960 case OMPD_section: 8961 case OMPD_single: 8962 case OMPD_master: 8963 case OMPD_critical: 8964 case OMPD_taskyield: 8965 case OMPD_barrier: 8966 case OMPD_taskwait: 8967 case OMPD_taskgroup: 8968 case OMPD_atomic: 8969 case OMPD_flush: 8970 case OMPD_teams: 8971 case OMPD_target_data: 8972 case OMPD_target_exit_data: 8973 case OMPD_target_enter_data: 8974 case OMPD_distribute: 8975 case OMPD_distribute_simd: 8976 case OMPD_distribute_parallel_for: 8977 case OMPD_distribute_parallel_for_simd: 8978 case OMPD_teams_distribute: 8979 case OMPD_teams_distribute_simd: 8980 case OMPD_teams_distribute_parallel_for: 8981 case OMPD_teams_distribute_parallel_for_simd: 8982 case OMPD_target_update: 8983 case OMPD_declare_simd: 8984 case OMPD_declare_target: 8985 case OMPD_end_declare_target: 8986 case OMPD_declare_reduction: 8987 case OMPD_declare_mapper: 8988 case OMPD_taskloop: 8989 case OMPD_taskloop_simd: 8990 case OMPD_requires: 8991 case OMPD_unknown: 8992 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 8993 } 8994 return; 8995 } 8996 8997 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 8998 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 8999 return; 9000 9001 scanForTargetRegionsFunctions( 9002 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9003 return; 9004 } 9005 9006 // If this is a lambda function, look into its body. 9007 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9008 S = L->getBody(); 9009 9010 // Keep looking for target regions recursively. 9011 for (const Stmt *II : S->children()) 9012 scanForTargetRegionsFunctions(II, ParentName); 9013 } 9014 9015 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9016 // If emitting code for the host, we do not process FD here. Instead we do 9017 // the normal code generation. 9018 if (!CGM.getLangOpts().OpenMPIsDevice) 9019 return false; 9020 9021 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9022 StringRef Name = CGM.getMangledName(GD); 9023 // Try to detect target regions in the function. 9024 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) 9025 scanForTargetRegionsFunctions(FD->getBody(), Name); 9026 9027 // Do not to emit function if it is not marked as declare target. 9028 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9029 AlreadyEmittedTargetFunctions.count(Name) == 0; 9030 } 9031 9032 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9033 if (!CGM.getLangOpts().OpenMPIsDevice) 9034 return false; 9035 9036 // Check if there are Ctors/Dtors in this declaration and look for target 9037 // regions in it. We use the complete variant to produce the kernel name 9038 // mangling. 9039 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9040 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9041 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9042 StringRef ParentName = 9043 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9044 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9045 } 9046 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9047 StringRef ParentName = 9048 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9049 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9050 } 9051 } 9052 9053 // Do not to emit variable if it is not marked as declare target. 9054 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9055 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9056 cast<VarDecl>(GD.getDecl())); 9057 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { 9058 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9059 return true; 9060 } 9061 return false; 9062 } 9063 9064 llvm::Constant * 9065 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9066 const VarDecl *VD) { 9067 assert(VD->getType().isConstant(CGM.getContext()) && 9068 "Expected constant variable."); 9069 StringRef VarName; 9070 llvm::Constant *Addr; 9071 llvm::GlobalValue::LinkageTypes Linkage; 9072 QualType Ty = VD->getType(); 9073 SmallString<128> Buffer; 9074 { 9075 unsigned DeviceID; 9076 unsigned FileID; 9077 unsigned Line; 9078 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9079 FileID, Line); 9080 llvm::raw_svector_ostream OS(Buffer); 9081 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9082 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9083 VarName = OS.str(); 9084 } 9085 Linkage = llvm::GlobalValue::InternalLinkage; 9086 Addr = 9087 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9088 getDefaultFirstprivateAddressSpace()); 9089 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9090 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9091 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9092 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9093 VarName, Addr, VarSize, 9094 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9095 return Addr; 9096 } 9097 9098 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9099 llvm::Constant *Addr) { 9100 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9101 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9102 if (!Res) { 9103 if (CGM.getLangOpts().OpenMPIsDevice) { 9104 // Register non-target variables being emitted in device code (debug info 9105 // may cause this). 9106 StringRef VarName = CGM.getMangledName(VD); 9107 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9108 } 9109 return; 9110 } 9111 // Register declare target variables. 9112 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9113 StringRef VarName; 9114 CharUnits VarSize; 9115 llvm::GlobalValue::LinkageTypes Linkage; 9116 switch (*Res) { 9117 case OMPDeclareTargetDeclAttr::MT_To: 9118 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9119 VarName = CGM.getMangledName(VD); 9120 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9121 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9122 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9123 } else { 9124 VarSize = CharUnits::Zero(); 9125 } 9126 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9127 // Temp solution to prevent optimizations of the internal variables. 9128 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9129 std::string RefName = getName({VarName, "ref"}); 9130 if (!CGM.GetGlobalValue(RefName)) { 9131 llvm::Constant *AddrRef = 9132 getOrCreateInternalVariable(Addr->getType(), RefName); 9133 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9134 GVAddrRef->setConstant(/*Val=*/true); 9135 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9136 GVAddrRef->setInitializer(Addr); 9137 CGM.addCompilerUsedGlobal(GVAddrRef); 9138 } 9139 } 9140 break; 9141 case OMPDeclareTargetDeclAttr::MT_Link: 9142 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9143 if (CGM.getLangOpts().OpenMPIsDevice) { 9144 VarName = Addr->getName(); 9145 Addr = nullptr; 9146 } else { 9147 VarName = getAddrOfDeclareTargetLink(VD).getName(); 9148 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); 9149 } 9150 VarSize = CGM.getPointerSize(); 9151 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9152 break; 9153 } 9154 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9155 VarName, Addr, VarSize, Flags, Linkage); 9156 } 9157 9158 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9159 if (isa<FunctionDecl>(GD.getDecl()) || 9160 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9161 return emitTargetFunctions(GD); 9162 9163 return emitTargetGlobalVariable(GD); 9164 } 9165 9166 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9167 for (const VarDecl *VD : DeferredGlobalVariables) { 9168 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9169 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9170 if (!Res) 9171 continue; 9172 if (*Res == OMPDeclareTargetDeclAttr::MT_To) { 9173 CGM.EmitGlobal(VD); 9174 } else { 9175 assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && 9176 "Expected to or link clauses."); 9177 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 9178 } 9179 } 9180 } 9181 9182 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9183 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9184 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9185 " Expected target-based directive."); 9186 } 9187 9188 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9189 LangAS &AS) { 9190 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9191 return false; 9192 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9193 switch(A->getAllocatorType()) { 9194 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9195 // Not supported, fallback to the default mem space. 9196 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9197 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9198 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9199 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9200 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9201 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9202 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9203 AS = LangAS::Default; 9204 return true; 9205 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9206 llvm_unreachable("Expected predefined allocator for the variables with the " 9207 "static storage."); 9208 } 9209 return false; 9210 } 9211 9212 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9213 CodeGenModule &CGM) 9214 : CGM(CGM) { 9215 if (CGM.getLangOpts().OpenMPIsDevice) { 9216 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9217 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9218 } 9219 } 9220 9221 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9222 if (CGM.getLangOpts().OpenMPIsDevice) 9223 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9224 } 9225 9226 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9227 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9228 return true; 9229 9230 StringRef Name = CGM.getMangledName(GD); 9231 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9232 // Do not to emit function if it is marked as declare target as it was already 9233 // emitted. 9234 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9235 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9236 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9237 return !F->isDeclaration(); 9238 return false; 9239 } 9240 return true; 9241 } 9242 9243 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9244 } 9245 9246 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 9247 // If we have offloading in the current module, we need to emit the entries 9248 // now and register the offloading descriptor. 9249 createOffloadEntriesAndInfoMetadata(); 9250 9251 // Create and register the offloading binary descriptors. This is the main 9252 // entity that captures all the information about offloading in the current 9253 // compilation unit. 9254 return createOffloadingBinaryDescriptorRegistration(); 9255 } 9256 9257 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9258 const OMPExecutableDirective &D, 9259 SourceLocation Loc, 9260 llvm::Function *OutlinedFn, 9261 ArrayRef<llvm::Value *> CapturedVars) { 9262 if (!CGF.HaveInsertPoint()) 9263 return; 9264 9265 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9266 CodeGenFunction::RunCleanupsScope Scope(CGF); 9267 9268 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9269 llvm::Value *Args[] = { 9270 RTLoc, 9271 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9272 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9273 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9274 RealArgs.append(std::begin(Args), std::end(Args)); 9275 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9276 9277 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9278 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9279 } 9280 9281 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9282 const Expr *NumTeams, 9283 const Expr *ThreadLimit, 9284 SourceLocation Loc) { 9285 if (!CGF.HaveInsertPoint()) 9286 return; 9287 9288 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9289 9290 llvm::Value *NumTeamsVal = 9291 NumTeams 9292 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9293 CGF.CGM.Int32Ty, /* isSigned = */ true) 9294 : CGF.Builder.getInt32(0); 9295 9296 llvm::Value *ThreadLimitVal = 9297 ThreadLimit 9298 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9299 CGF.CGM.Int32Ty, /* isSigned = */ true) 9300 : CGF.Builder.getInt32(0); 9301 9302 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9303 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9304 ThreadLimitVal}; 9305 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9306 PushNumTeamsArgs); 9307 } 9308 9309 void CGOpenMPRuntime::emitTargetDataCalls( 9310 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9311 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9312 if (!CGF.HaveInsertPoint()) 9313 return; 9314 9315 // Action used to replace the default codegen action and turn privatization 9316 // off. 9317 PrePostActionTy NoPrivAction; 9318 9319 // Generate the code for the opening of the data environment. Capture all the 9320 // arguments of the runtime call by reference because they are used in the 9321 // closing of the region. 9322 auto &&BeginThenGen = [this, &D, Device, &Info, 9323 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9324 // Fill up the arrays with all the mapped variables. 9325 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9326 MappableExprsHandler::MapValuesArrayTy Pointers; 9327 MappableExprsHandler::MapValuesArrayTy Sizes; 9328 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9329 9330 // Get map clause information. 9331 MappableExprsHandler MCHandler(D, CGF); 9332 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9333 9334 // Fill up the arrays and create the arguments. 9335 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9336 9337 llvm::Value *BasePointersArrayArg = nullptr; 9338 llvm::Value *PointersArrayArg = nullptr; 9339 llvm::Value *SizesArrayArg = nullptr; 9340 llvm::Value *MapTypesArrayArg = nullptr; 9341 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9342 SizesArrayArg, MapTypesArrayArg, Info); 9343 9344 // Emit device ID if any. 9345 llvm::Value *DeviceID = nullptr; 9346 if (Device) { 9347 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9348 CGF.Int64Ty, /*isSigned=*/true); 9349 } else { 9350 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9351 } 9352 9353 // Emit the number of elements in the offloading arrays. 9354 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9355 9356 llvm::Value *OffloadingArgs[] = { 9357 DeviceID, PointerNum, BasePointersArrayArg, 9358 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9359 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 9360 OffloadingArgs); 9361 9362 // If device pointer privatization is required, emit the body of the region 9363 // here. It will have to be duplicated: with and without privatization. 9364 if (!Info.CaptureDeviceAddrMap.empty()) 9365 CodeGen(CGF); 9366 }; 9367 9368 // Generate code for the closing of the data region. 9369 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 9370 PrePostActionTy &) { 9371 assert(Info.isValid() && "Invalid data environment closing arguments."); 9372 9373 llvm::Value *BasePointersArrayArg = nullptr; 9374 llvm::Value *PointersArrayArg = nullptr; 9375 llvm::Value *SizesArrayArg = nullptr; 9376 llvm::Value *MapTypesArrayArg = nullptr; 9377 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9378 SizesArrayArg, MapTypesArrayArg, Info); 9379 9380 // Emit device ID if any. 9381 llvm::Value *DeviceID = nullptr; 9382 if (Device) { 9383 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9384 CGF.Int64Ty, /*isSigned=*/true); 9385 } else { 9386 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9387 } 9388 9389 // Emit the number of elements in the offloading arrays. 9390 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9391 9392 llvm::Value *OffloadingArgs[] = { 9393 DeviceID, PointerNum, BasePointersArrayArg, 9394 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9395 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 9396 OffloadingArgs); 9397 }; 9398 9399 // If we need device pointer privatization, we need to emit the body of the 9400 // region with no privatization in the 'else' branch of the conditional. 9401 // Otherwise, we don't have to do anything. 9402 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 9403 PrePostActionTy &) { 9404 if (!Info.CaptureDeviceAddrMap.empty()) { 9405 CodeGen.setAction(NoPrivAction); 9406 CodeGen(CGF); 9407 } 9408 }; 9409 9410 // We don't have to do anything to close the region if the if clause evaluates 9411 // to false. 9412 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 9413 9414 if (IfCond) { 9415 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 9416 } else { 9417 RegionCodeGenTy RCG(BeginThenGen); 9418 RCG(CGF); 9419 } 9420 9421 // If we don't require privatization of device pointers, we emit the body in 9422 // between the runtime calls. This avoids duplicating the body code. 9423 if (Info.CaptureDeviceAddrMap.empty()) { 9424 CodeGen.setAction(NoPrivAction); 9425 CodeGen(CGF); 9426 } 9427 9428 if (IfCond) { 9429 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 9430 } else { 9431 RegionCodeGenTy RCG(EndThenGen); 9432 RCG(CGF); 9433 } 9434 } 9435 9436 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 9437 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9438 const Expr *Device) { 9439 if (!CGF.HaveInsertPoint()) 9440 return; 9441 9442 assert((isa<OMPTargetEnterDataDirective>(D) || 9443 isa<OMPTargetExitDataDirective>(D) || 9444 isa<OMPTargetUpdateDirective>(D)) && 9445 "Expecting either target enter, exit data, or update directives."); 9446 9447 CodeGenFunction::OMPTargetDataInfo InputInfo; 9448 llvm::Value *MapTypesArray = nullptr; 9449 // Generate the code for the opening of the data environment. 9450 auto &&ThenGen = [this, &D, Device, &InputInfo, 9451 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 9452 // Emit device ID if any. 9453 llvm::Value *DeviceID = nullptr; 9454 if (Device) { 9455 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9456 CGF.Int64Ty, /*isSigned=*/true); 9457 } else { 9458 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9459 } 9460 9461 // Emit the number of elements in the offloading arrays. 9462 llvm::Constant *PointerNum = 9463 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9464 9465 llvm::Value *OffloadingArgs[] = {DeviceID, 9466 PointerNum, 9467 InputInfo.BasePointersArray.getPointer(), 9468 InputInfo.PointersArray.getPointer(), 9469 InputInfo.SizesArray.getPointer(), 9470 MapTypesArray}; 9471 9472 // Select the right runtime function call for each expected standalone 9473 // directive. 9474 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9475 OpenMPRTLFunction RTLFn; 9476 switch (D.getDirectiveKind()) { 9477 case OMPD_target_enter_data: 9478 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 9479 : OMPRTL__tgt_target_data_begin; 9480 break; 9481 case OMPD_target_exit_data: 9482 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 9483 : OMPRTL__tgt_target_data_end; 9484 break; 9485 case OMPD_target_update: 9486 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 9487 : OMPRTL__tgt_target_data_update; 9488 break; 9489 case OMPD_parallel: 9490 case OMPD_for: 9491 case OMPD_parallel_for: 9492 case OMPD_parallel_sections: 9493 case OMPD_for_simd: 9494 case OMPD_parallel_for_simd: 9495 case OMPD_cancel: 9496 case OMPD_cancellation_point: 9497 case OMPD_ordered: 9498 case OMPD_threadprivate: 9499 case OMPD_allocate: 9500 case OMPD_task: 9501 case OMPD_simd: 9502 case OMPD_sections: 9503 case OMPD_section: 9504 case OMPD_single: 9505 case OMPD_master: 9506 case OMPD_critical: 9507 case OMPD_taskyield: 9508 case OMPD_barrier: 9509 case OMPD_taskwait: 9510 case OMPD_taskgroup: 9511 case OMPD_atomic: 9512 case OMPD_flush: 9513 case OMPD_teams: 9514 case OMPD_target_data: 9515 case OMPD_distribute: 9516 case OMPD_distribute_simd: 9517 case OMPD_distribute_parallel_for: 9518 case OMPD_distribute_parallel_for_simd: 9519 case OMPD_teams_distribute: 9520 case OMPD_teams_distribute_simd: 9521 case OMPD_teams_distribute_parallel_for: 9522 case OMPD_teams_distribute_parallel_for_simd: 9523 case OMPD_declare_simd: 9524 case OMPD_declare_target: 9525 case OMPD_end_declare_target: 9526 case OMPD_declare_reduction: 9527 case OMPD_declare_mapper: 9528 case OMPD_taskloop: 9529 case OMPD_taskloop_simd: 9530 case OMPD_target: 9531 case OMPD_target_simd: 9532 case OMPD_target_teams_distribute: 9533 case OMPD_target_teams_distribute_simd: 9534 case OMPD_target_teams_distribute_parallel_for: 9535 case OMPD_target_teams_distribute_parallel_for_simd: 9536 case OMPD_target_teams: 9537 case OMPD_target_parallel: 9538 case OMPD_target_parallel_for: 9539 case OMPD_target_parallel_for_simd: 9540 case OMPD_requires: 9541 case OMPD_unknown: 9542 llvm_unreachable("Unexpected standalone target data directive."); 9543 break; 9544 } 9545 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 9546 }; 9547 9548 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 9549 CodeGenFunction &CGF, PrePostActionTy &) { 9550 // Fill up the arrays with all the mapped variables. 9551 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9552 MappableExprsHandler::MapValuesArrayTy Pointers; 9553 MappableExprsHandler::MapValuesArrayTy Sizes; 9554 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9555 9556 // Get map clause information. 9557 MappableExprsHandler MEHandler(D, CGF); 9558 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9559 9560 TargetDataInfo Info; 9561 // Fill up the arrays and create the arguments. 9562 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9563 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9564 Info.PointersArray, Info.SizesArray, 9565 Info.MapTypesArray, Info); 9566 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9567 InputInfo.BasePointersArray = 9568 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9569 InputInfo.PointersArray = 9570 Address(Info.PointersArray, CGM.getPointerAlign()); 9571 InputInfo.SizesArray = 9572 Address(Info.SizesArray, CGM.getPointerAlign()); 9573 MapTypesArray = Info.MapTypesArray; 9574 if (D.hasClausesOfKind<OMPDependClause>()) 9575 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9576 else 9577 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9578 }; 9579 9580 if (IfCond) { 9581 emitOMPIfClause(CGF, IfCond, TargetThenGen, 9582 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 9583 } else { 9584 RegionCodeGenTy ThenRCG(TargetThenGen); 9585 ThenRCG(CGF); 9586 } 9587 } 9588 9589 namespace { 9590 /// Kind of parameter in a function with 'declare simd' directive. 9591 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 9592 /// Attribute set of the parameter. 9593 struct ParamAttrTy { 9594 ParamKindTy Kind = Vector; 9595 llvm::APSInt StrideOrArg; 9596 llvm::APSInt Alignment; 9597 }; 9598 } // namespace 9599 9600 static unsigned evaluateCDTSize(const FunctionDecl *FD, 9601 ArrayRef<ParamAttrTy> ParamAttrs) { 9602 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 9603 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 9604 // of that clause. The VLEN value must be power of 2. 9605 // In other case the notion of the function`s "characteristic data type" (CDT) 9606 // is used to compute the vector length. 9607 // CDT is defined in the following order: 9608 // a) For non-void function, the CDT is the return type. 9609 // b) If the function has any non-uniform, non-linear parameters, then the 9610 // CDT is the type of the first such parameter. 9611 // c) If the CDT determined by a) or b) above is struct, union, or class 9612 // type which is pass-by-value (except for the type that maps to the 9613 // built-in complex data type), the characteristic data type is int. 9614 // d) If none of the above three cases is applicable, the CDT is int. 9615 // The VLEN is then determined based on the CDT and the size of vector 9616 // register of that ISA for which current vector version is generated. The 9617 // VLEN is computed using the formula below: 9618 // VLEN = sizeof(vector_register) / sizeof(CDT), 9619 // where vector register size specified in section 3.2.1 Registers and the 9620 // Stack Frame of original AMD64 ABI document. 9621 QualType RetType = FD->getReturnType(); 9622 if (RetType.isNull()) 9623 return 0; 9624 ASTContext &C = FD->getASTContext(); 9625 QualType CDT; 9626 if (!RetType.isNull() && !RetType->isVoidType()) { 9627 CDT = RetType; 9628 } else { 9629 unsigned Offset = 0; 9630 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 9631 if (ParamAttrs[Offset].Kind == Vector) 9632 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 9633 ++Offset; 9634 } 9635 if (CDT.isNull()) { 9636 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9637 if (ParamAttrs[I + Offset].Kind == Vector) { 9638 CDT = FD->getParamDecl(I)->getType(); 9639 break; 9640 } 9641 } 9642 } 9643 } 9644 if (CDT.isNull()) 9645 CDT = C.IntTy; 9646 CDT = CDT->getCanonicalTypeUnqualified(); 9647 if (CDT->isRecordType() || CDT->isUnionType()) 9648 CDT = C.IntTy; 9649 return C.getTypeSize(CDT); 9650 } 9651 9652 static void 9653 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 9654 const llvm::APSInt &VLENVal, 9655 ArrayRef<ParamAttrTy> ParamAttrs, 9656 OMPDeclareSimdDeclAttr::BranchStateTy State) { 9657 struct ISADataTy { 9658 char ISA; 9659 unsigned VecRegSize; 9660 }; 9661 ISADataTy ISAData[] = { 9662 { 9663 'b', 128 9664 }, // SSE 9665 { 9666 'c', 256 9667 }, // AVX 9668 { 9669 'd', 256 9670 }, // AVX2 9671 { 9672 'e', 512 9673 }, // AVX512 9674 }; 9675 llvm::SmallVector<char, 2> Masked; 9676 switch (State) { 9677 case OMPDeclareSimdDeclAttr::BS_Undefined: 9678 Masked.push_back('N'); 9679 Masked.push_back('M'); 9680 break; 9681 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9682 Masked.push_back('N'); 9683 break; 9684 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9685 Masked.push_back('M'); 9686 break; 9687 } 9688 for (char Mask : Masked) { 9689 for (const ISADataTy &Data : ISAData) { 9690 SmallString<256> Buffer; 9691 llvm::raw_svector_ostream Out(Buffer); 9692 Out << "_ZGV" << Data.ISA << Mask; 9693 if (!VLENVal) { 9694 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 9695 evaluateCDTSize(FD, ParamAttrs)); 9696 } else { 9697 Out << VLENVal; 9698 } 9699 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 9700 switch (ParamAttr.Kind){ 9701 case LinearWithVarStride: 9702 Out << 's' << ParamAttr.StrideOrArg; 9703 break; 9704 case Linear: 9705 Out << 'l'; 9706 if (!!ParamAttr.StrideOrArg) 9707 Out << ParamAttr.StrideOrArg; 9708 break; 9709 case Uniform: 9710 Out << 'u'; 9711 break; 9712 case Vector: 9713 Out << 'v'; 9714 break; 9715 } 9716 if (!!ParamAttr.Alignment) 9717 Out << 'a' << ParamAttr.Alignment; 9718 } 9719 Out << '_' << Fn->getName(); 9720 Fn->addFnAttr(Out.str()); 9721 } 9722 } 9723 } 9724 9725 // This are the Functions that are needed to mangle the name of the 9726 // vector functions generated by the compiler, according to the rules 9727 // defined in the "Vector Function ABI specifications for AArch64", 9728 // available at 9729 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 9730 9731 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 9732 /// 9733 /// TODO: Need to implement the behavior for reference marked with a 9734 /// var or no linear modifiers (1.b in the section). For this, we 9735 /// need to extend ParamKindTy to support the linear modifiers. 9736 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 9737 QT = QT.getCanonicalType(); 9738 9739 if (QT->isVoidType()) 9740 return false; 9741 9742 if (Kind == ParamKindTy::Uniform) 9743 return false; 9744 9745 if (Kind == ParamKindTy::Linear) 9746 return false; 9747 9748 // TODO: Handle linear references with modifiers 9749 9750 if (Kind == ParamKindTy::LinearWithVarStride) 9751 return false; 9752 9753 return true; 9754 } 9755 9756 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 9757 static bool getAArch64PBV(QualType QT, ASTContext &C) { 9758 QT = QT.getCanonicalType(); 9759 unsigned Size = C.getTypeSize(QT); 9760 9761 // Only scalars and complex within 16 bytes wide set PVB to true. 9762 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 9763 return false; 9764 9765 if (QT->isFloatingType()) 9766 return true; 9767 9768 if (QT->isIntegerType()) 9769 return true; 9770 9771 if (QT->isPointerType()) 9772 return true; 9773 9774 // TODO: Add support for complex types (section 3.1.2, item 2). 9775 9776 return false; 9777 } 9778 9779 /// Computes the lane size (LS) of a return type or of an input parameter, 9780 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 9781 /// TODO: Add support for references, section 3.2.1, item 1. 9782 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 9783 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 9784 QualType PTy = QT.getCanonicalType()->getPointeeType(); 9785 if (getAArch64PBV(PTy, C)) 9786 return C.getTypeSize(PTy); 9787 } 9788 if (getAArch64PBV(QT, C)) 9789 return C.getTypeSize(QT); 9790 9791 return C.getTypeSize(C.getUIntPtrType()); 9792 } 9793 9794 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 9795 // signature of the scalar function, as defined in 3.2.2 of the 9796 // AAVFABI. 9797 static std::tuple<unsigned, unsigned, bool> 9798 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 9799 QualType RetType = FD->getReturnType().getCanonicalType(); 9800 9801 ASTContext &C = FD->getASTContext(); 9802 9803 bool OutputBecomesInput = false; 9804 9805 llvm::SmallVector<unsigned, 8> Sizes; 9806 if (!RetType->isVoidType()) { 9807 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 9808 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 9809 OutputBecomesInput = true; 9810 } 9811 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9812 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 9813 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 9814 } 9815 9816 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 9817 // The LS of a function parameter / return value can only be a power 9818 // of 2, starting from 8 bits, up to 128. 9819 assert(std::all_of(Sizes.begin(), Sizes.end(), 9820 [](unsigned Size) { 9821 return Size == 8 || Size == 16 || Size == 32 || 9822 Size == 64 || Size == 128; 9823 }) && 9824 "Invalid size"); 9825 9826 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 9827 *std::max_element(std::begin(Sizes), std::end(Sizes)), 9828 OutputBecomesInput); 9829 } 9830 9831 /// Mangle the parameter part of the vector function name according to 9832 /// their OpenMP classification. The mangling function is defined in 9833 /// section 3.5 of the AAVFABI. 9834 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 9835 SmallString<256> Buffer; 9836 llvm::raw_svector_ostream Out(Buffer); 9837 for (const auto &ParamAttr : ParamAttrs) { 9838 switch (ParamAttr.Kind) { 9839 case LinearWithVarStride: 9840 Out << "ls" << ParamAttr.StrideOrArg; 9841 break; 9842 case Linear: 9843 Out << 'l'; 9844 // Don't print the step value if it is not present or if it is 9845 // equal to 1. 9846 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 9847 Out << ParamAttr.StrideOrArg; 9848 break; 9849 case Uniform: 9850 Out << 'u'; 9851 break; 9852 case Vector: 9853 Out << 'v'; 9854 break; 9855 } 9856 9857 if (!!ParamAttr.Alignment) 9858 Out << 'a' << ParamAttr.Alignment; 9859 } 9860 9861 return Out.str(); 9862 } 9863 9864 // Function used to add the attribute. The parameter `VLEN` is 9865 // templated to allow the use of "x" when targeting scalable functions 9866 // for SVE. 9867 template <typename T> 9868 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 9869 char ISA, StringRef ParSeq, 9870 StringRef MangledName, bool OutputBecomesInput, 9871 llvm::Function *Fn) { 9872 SmallString<256> Buffer; 9873 llvm::raw_svector_ostream Out(Buffer); 9874 Out << Prefix << ISA << LMask << VLEN; 9875 if (OutputBecomesInput) 9876 Out << "v"; 9877 Out << ParSeq << "_" << MangledName; 9878 Fn->addFnAttr(Out.str()); 9879 } 9880 9881 // Helper function to generate the Advanced SIMD names depending on 9882 // the value of the NDS when simdlen is not present. 9883 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 9884 StringRef Prefix, char ISA, 9885 StringRef ParSeq, StringRef MangledName, 9886 bool OutputBecomesInput, 9887 llvm::Function *Fn) { 9888 switch (NDS) { 9889 case 8: 9890 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 9891 OutputBecomesInput, Fn); 9892 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 9893 OutputBecomesInput, Fn); 9894 break; 9895 case 16: 9896 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 9897 OutputBecomesInput, Fn); 9898 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 9899 OutputBecomesInput, Fn); 9900 break; 9901 case 32: 9902 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 9903 OutputBecomesInput, Fn); 9904 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 9905 OutputBecomesInput, Fn); 9906 break; 9907 case 64: 9908 case 128: 9909 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 9910 OutputBecomesInput, Fn); 9911 break; 9912 default: 9913 llvm_unreachable("Scalar type is too wide."); 9914 } 9915 } 9916 9917 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 9918 static void emitAArch64DeclareSimdFunction( 9919 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 9920 ArrayRef<ParamAttrTy> ParamAttrs, 9921 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 9922 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 9923 9924 // Get basic data for building the vector signature. 9925 const auto Data = getNDSWDS(FD, ParamAttrs); 9926 const unsigned NDS = std::get<0>(Data); 9927 const unsigned WDS = std::get<1>(Data); 9928 const bool OutputBecomesInput = std::get<2>(Data); 9929 9930 // Check the values provided via `simdlen` by the user. 9931 // 1. A `simdlen(1)` doesn't produce vector signatures, 9932 if (UserVLEN == 1) { 9933 unsigned DiagID = CGM.getDiags().getCustomDiagID( 9934 DiagnosticsEngine::Warning, 9935 "The clause simdlen(1) has no effect when targeting aarch64."); 9936 CGM.getDiags().Report(SLoc, DiagID); 9937 return; 9938 } 9939 9940 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 9941 // Advanced SIMD output. 9942 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 9943 unsigned DiagID = CGM.getDiags().getCustomDiagID( 9944 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 9945 "power of 2 when targeting Advanced SIMD."); 9946 CGM.getDiags().Report(SLoc, DiagID); 9947 return; 9948 } 9949 9950 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 9951 // limits. 9952 if (ISA == 's' && UserVLEN != 0) { 9953 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 9954 unsigned DiagID = CGM.getDiags().getCustomDiagID( 9955 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 9956 "lanes in the architectural constraints " 9957 "for SVE (min is 128-bit, max is " 9958 "2048-bit, by steps of 128-bit)"); 9959 CGM.getDiags().Report(SLoc, DiagID) << WDS; 9960 return; 9961 } 9962 } 9963 9964 // Sort out parameter sequence. 9965 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 9966 StringRef Prefix = "_ZGV"; 9967 // Generate simdlen from user input (if any). 9968 if (UserVLEN) { 9969 if (ISA == 's') { 9970 // SVE generates only a masked function. 9971 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 9972 OutputBecomesInput, Fn); 9973 } else { 9974 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 9975 // Advanced SIMD generates one or two functions, depending on 9976 // the `[not]inbranch` clause. 9977 switch (State) { 9978 case OMPDeclareSimdDeclAttr::BS_Undefined: 9979 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 9980 OutputBecomesInput, Fn); 9981 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 9982 OutputBecomesInput, Fn); 9983 break; 9984 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9985 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 9986 OutputBecomesInput, Fn); 9987 break; 9988 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9989 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 9990 OutputBecomesInput, Fn); 9991 break; 9992 } 9993 } 9994 } else { 9995 // If no user simdlen is provided, follow the AAVFABI rules for 9996 // generating the vector length. 9997 if (ISA == 's') { 9998 // SVE, section 3.4.1, item 1. 9999 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10000 OutputBecomesInput, Fn); 10001 } else { 10002 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10003 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10004 // two vector names depending on the use of the clause 10005 // `[not]inbranch`. 10006 switch (State) { 10007 case OMPDeclareSimdDeclAttr::BS_Undefined: 10008 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10009 OutputBecomesInput, Fn); 10010 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10011 OutputBecomesInput, Fn); 10012 break; 10013 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10014 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10015 OutputBecomesInput, Fn); 10016 break; 10017 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10018 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10019 OutputBecomesInput, Fn); 10020 break; 10021 } 10022 } 10023 } 10024 } 10025 10026 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10027 llvm::Function *Fn) { 10028 ASTContext &C = CGM.getContext(); 10029 FD = FD->getMostRecentDecl(); 10030 // Map params to their positions in function decl. 10031 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10032 if (isa<CXXMethodDecl>(FD)) 10033 ParamPositions.try_emplace(FD, 0); 10034 unsigned ParamPos = ParamPositions.size(); 10035 for (const ParmVarDecl *P : FD->parameters()) { 10036 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10037 ++ParamPos; 10038 } 10039 while (FD) { 10040 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10041 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10042 // Mark uniform parameters. 10043 for (const Expr *E : Attr->uniforms()) { 10044 E = E->IgnoreParenImpCasts(); 10045 unsigned Pos; 10046 if (isa<CXXThisExpr>(E)) { 10047 Pos = ParamPositions[FD]; 10048 } else { 10049 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10050 ->getCanonicalDecl(); 10051 Pos = ParamPositions[PVD]; 10052 } 10053 ParamAttrs[Pos].Kind = Uniform; 10054 } 10055 // Get alignment info. 10056 auto NI = Attr->alignments_begin(); 10057 for (const Expr *E : Attr->aligneds()) { 10058 E = E->IgnoreParenImpCasts(); 10059 unsigned Pos; 10060 QualType ParmTy; 10061 if (isa<CXXThisExpr>(E)) { 10062 Pos = ParamPositions[FD]; 10063 ParmTy = E->getType(); 10064 } else { 10065 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10066 ->getCanonicalDecl(); 10067 Pos = ParamPositions[PVD]; 10068 ParmTy = PVD->getType(); 10069 } 10070 ParamAttrs[Pos].Alignment = 10071 (*NI) 10072 ? (*NI)->EvaluateKnownConstInt(C) 10073 : llvm::APSInt::getUnsigned( 10074 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10075 .getQuantity()); 10076 ++NI; 10077 } 10078 // Mark linear parameters. 10079 auto SI = Attr->steps_begin(); 10080 auto MI = Attr->modifiers_begin(); 10081 for (const Expr *E : Attr->linears()) { 10082 E = E->IgnoreParenImpCasts(); 10083 unsigned Pos; 10084 if (isa<CXXThisExpr>(E)) { 10085 Pos = ParamPositions[FD]; 10086 } else { 10087 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10088 ->getCanonicalDecl(); 10089 Pos = ParamPositions[PVD]; 10090 } 10091 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10092 ParamAttr.Kind = Linear; 10093 if (*SI) { 10094 Expr::EvalResult Result; 10095 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10096 if (const auto *DRE = 10097 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10098 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10099 ParamAttr.Kind = LinearWithVarStride; 10100 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10101 ParamPositions[StridePVD->getCanonicalDecl()]); 10102 } 10103 } 10104 } else { 10105 ParamAttr.StrideOrArg = Result.Val.getInt(); 10106 } 10107 } 10108 ++SI; 10109 ++MI; 10110 } 10111 llvm::APSInt VLENVal; 10112 SourceLocation ExprLoc; 10113 const Expr *VLENExpr = Attr->getSimdlen(); 10114 if (VLENExpr) { 10115 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10116 ExprLoc = VLENExpr->getExprLoc(); 10117 } 10118 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10119 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10120 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10121 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10122 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10123 unsigned VLEN = VLENVal.getExtValue(); 10124 StringRef MangledName = Fn->getName(); 10125 if (CGM.getTarget().hasFeature("sve")) 10126 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10127 MangledName, 's', 128, Fn, ExprLoc); 10128 if (CGM.getTarget().hasFeature("neon")) 10129 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10130 MangledName, 'n', 128, Fn, ExprLoc); 10131 } 10132 } 10133 FD = FD->getPreviousDecl(); 10134 } 10135 } 10136 10137 namespace { 10138 /// Cleanup action for doacross support. 10139 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10140 public: 10141 static const int DoacrossFinArgs = 2; 10142 10143 private: 10144 llvm::FunctionCallee RTLFn; 10145 llvm::Value *Args[DoacrossFinArgs]; 10146 10147 public: 10148 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10149 ArrayRef<llvm::Value *> CallArgs) 10150 : RTLFn(RTLFn) { 10151 assert(CallArgs.size() == DoacrossFinArgs); 10152 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10153 } 10154 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10155 if (!CGF.HaveInsertPoint()) 10156 return; 10157 CGF.EmitRuntimeCall(RTLFn, Args); 10158 } 10159 }; 10160 } // namespace 10161 10162 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10163 const OMPLoopDirective &D, 10164 ArrayRef<Expr *> NumIterations) { 10165 if (!CGF.HaveInsertPoint()) 10166 return; 10167 10168 ASTContext &C = CGM.getContext(); 10169 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10170 RecordDecl *RD; 10171 if (KmpDimTy.isNull()) { 10172 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10173 // kmp_int64 lo; // lower 10174 // kmp_int64 up; // upper 10175 // kmp_int64 st; // stride 10176 // }; 10177 RD = C.buildImplicitRecord("kmp_dim"); 10178 RD->startDefinition(); 10179 addFieldToRecordDecl(C, RD, Int64Ty); 10180 addFieldToRecordDecl(C, RD, Int64Ty); 10181 addFieldToRecordDecl(C, RD, Int64Ty); 10182 RD->completeDefinition(); 10183 KmpDimTy = C.getRecordType(RD); 10184 } else { 10185 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10186 } 10187 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10188 QualType ArrayTy = 10189 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 10190 10191 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10192 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10193 enum { LowerFD = 0, UpperFD, StrideFD }; 10194 // Fill dims with data. 10195 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10196 LValue DimsLVal = CGF.MakeAddrLValue( 10197 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10198 // dims.upper = num_iterations; 10199 LValue UpperLVal = CGF.EmitLValueForField( 10200 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10201 llvm::Value *NumIterVal = 10202 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10203 D.getNumIterations()->getType(), Int64Ty, 10204 D.getNumIterations()->getExprLoc()); 10205 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10206 // dims.stride = 1; 10207 LValue StrideLVal = CGF.EmitLValueForField( 10208 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10209 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10210 StrideLVal); 10211 } 10212 10213 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10214 // kmp_int32 num_dims, struct kmp_dim * dims); 10215 llvm::Value *Args[] = { 10216 emitUpdateLocation(CGF, D.getBeginLoc()), 10217 getThreadID(CGF, D.getBeginLoc()), 10218 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10219 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10220 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10221 CGM.VoidPtrTy)}; 10222 10223 llvm::FunctionCallee RTLFn = 10224 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10225 CGF.EmitRuntimeCall(RTLFn, Args); 10226 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10227 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10228 llvm::FunctionCallee FiniRTLFn = 10229 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10230 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10231 llvm::makeArrayRef(FiniArgs)); 10232 } 10233 10234 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10235 const OMPDependClause *C) { 10236 QualType Int64Ty = 10237 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10238 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10239 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10240 Int64Ty, Size, ArrayType::Normal, 0); 10241 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10242 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10243 const Expr *CounterVal = C->getLoopData(I); 10244 assert(CounterVal); 10245 llvm::Value *CntVal = CGF.EmitScalarConversion( 10246 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10247 CounterVal->getExprLoc()); 10248 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10249 /*Volatile=*/false, Int64Ty); 10250 } 10251 llvm::Value *Args[] = { 10252 emitUpdateLocation(CGF, C->getBeginLoc()), 10253 getThreadID(CGF, C->getBeginLoc()), 10254 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10255 llvm::FunctionCallee RTLFn; 10256 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10257 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10258 } else { 10259 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10260 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10261 } 10262 CGF.EmitRuntimeCall(RTLFn, Args); 10263 } 10264 10265 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10266 llvm::FunctionCallee Callee, 10267 ArrayRef<llvm::Value *> Args) const { 10268 assert(Loc.isValid() && "Outlined function call location must be valid."); 10269 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10270 10271 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10272 if (Fn->doesNotThrow()) { 10273 CGF.EmitNounwindRuntimeCall(Fn, Args); 10274 return; 10275 } 10276 } 10277 CGF.EmitRuntimeCall(Callee, Args); 10278 } 10279 10280 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10281 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10282 ArrayRef<llvm::Value *> Args) const { 10283 emitCall(CGF, Loc, OutlinedFn, Args); 10284 } 10285 10286 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10287 const VarDecl *NativeParam, 10288 const VarDecl *TargetParam) const { 10289 return CGF.GetAddrOfLocalVar(NativeParam); 10290 } 10291 10292 namespace { 10293 /// Cleanup action for allocate support. 10294 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10295 public: 10296 static const int CleanupArgs = 3; 10297 10298 private: 10299 llvm::FunctionCallee RTLFn; 10300 llvm::Value *Args[CleanupArgs]; 10301 10302 public: 10303 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10304 ArrayRef<llvm::Value *> CallArgs) 10305 : RTLFn(RTLFn) { 10306 assert(CallArgs.size() == CleanupArgs && 10307 "Size of arguments does not match."); 10308 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10309 } 10310 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10311 if (!CGF.HaveInsertPoint()) 10312 return; 10313 CGF.EmitRuntimeCall(RTLFn, Args); 10314 } 10315 }; 10316 } // namespace 10317 10318 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10319 const VarDecl *VD) { 10320 if (!VD) 10321 return Address::invalid(); 10322 const VarDecl *CVD = VD->getCanonicalDecl(); 10323 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10324 return Address::invalid(); 10325 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10326 // Use the default allocation. 10327 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10328 !AA->getAllocator()) 10329 return Address::invalid(); 10330 llvm::Value *Size; 10331 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10332 if (CVD->getType()->isVariablyModifiedType()) { 10333 Size = CGF.getTypeSize(CVD->getType()); 10334 // Align the size: ((size + align - 1) / align) * align 10335 Size = CGF.Builder.CreateNUWAdd( 10336 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10337 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10338 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10339 } else { 10340 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10341 Size = CGM.getSize(Sz.alignTo(Align)); 10342 } 10343 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10344 assert(AA->getAllocator() && 10345 "Expected allocator expression for non-default allocator."); 10346 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 10347 // According to the standard, the original allocator type is a enum (integer). 10348 // Convert to pointer type, if required. 10349 if (Allocator->getType()->isIntegerTy()) 10350 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 10351 else if (Allocator->getType()->isPointerTy()) 10352 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 10353 CGM.VoidPtrTy); 10354 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 10355 10356 llvm::Value *Addr = 10357 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 10358 CVD->getName() + ".void.addr"); 10359 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 10360 Allocator}; 10361 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 10362 10363 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10364 llvm::makeArrayRef(FiniArgs)); 10365 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10366 Addr, 10367 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 10368 CVD->getName() + ".addr"); 10369 return Address(Addr, Align); 10370 } 10371 10372 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 10373 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10374 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10375 llvm_unreachable("Not supported in SIMD-only mode"); 10376 } 10377 10378 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 10379 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10380 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10381 llvm_unreachable("Not supported in SIMD-only mode"); 10382 } 10383 10384 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 10385 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10386 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 10387 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 10388 bool Tied, unsigned &NumberOfParts) { 10389 llvm_unreachable("Not supported in SIMD-only mode"); 10390 } 10391 10392 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 10393 SourceLocation Loc, 10394 llvm::Function *OutlinedFn, 10395 ArrayRef<llvm::Value *> CapturedVars, 10396 const Expr *IfCond) { 10397 llvm_unreachable("Not supported in SIMD-only mode"); 10398 } 10399 10400 void CGOpenMPSIMDRuntime::emitCriticalRegion( 10401 CodeGenFunction &CGF, StringRef CriticalName, 10402 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 10403 const Expr *Hint) { 10404 llvm_unreachable("Not supported in SIMD-only mode"); 10405 } 10406 10407 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 10408 const RegionCodeGenTy &MasterOpGen, 10409 SourceLocation Loc) { 10410 llvm_unreachable("Not supported in SIMD-only mode"); 10411 } 10412 10413 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 10414 SourceLocation Loc) { 10415 llvm_unreachable("Not supported in SIMD-only mode"); 10416 } 10417 10418 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 10419 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 10420 SourceLocation Loc) { 10421 llvm_unreachable("Not supported in SIMD-only mode"); 10422 } 10423 10424 void CGOpenMPSIMDRuntime::emitSingleRegion( 10425 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 10426 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 10427 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 10428 ArrayRef<const Expr *> AssignmentOps) { 10429 llvm_unreachable("Not supported in SIMD-only mode"); 10430 } 10431 10432 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 10433 const RegionCodeGenTy &OrderedOpGen, 10434 SourceLocation Loc, 10435 bool IsThreads) { 10436 llvm_unreachable("Not supported in SIMD-only mode"); 10437 } 10438 10439 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 10440 SourceLocation Loc, 10441 OpenMPDirectiveKind Kind, 10442 bool EmitChecks, 10443 bool ForceSimpleCall) { 10444 llvm_unreachable("Not supported in SIMD-only mode"); 10445 } 10446 10447 void CGOpenMPSIMDRuntime::emitForDispatchInit( 10448 CodeGenFunction &CGF, SourceLocation Loc, 10449 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 10450 bool Ordered, const DispatchRTInput &DispatchValues) { 10451 llvm_unreachable("Not supported in SIMD-only mode"); 10452 } 10453 10454 void CGOpenMPSIMDRuntime::emitForStaticInit( 10455 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 10456 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 10457 llvm_unreachable("Not supported in SIMD-only mode"); 10458 } 10459 10460 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 10461 CodeGenFunction &CGF, SourceLocation Loc, 10462 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 10463 llvm_unreachable("Not supported in SIMD-only mode"); 10464 } 10465 10466 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 10467 SourceLocation Loc, 10468 unsigned IVSize, 10469 bool IVSigned) { 10470 llvm_unreachable("Not supported in SIMD-only mode"); 10471 } 10472 10473 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 10474 SourceLocation Loc, 10475 OpenMPDirectiveKind DKind) { 10476 llvm_unreachable("Not supported in SIMD-only mode"); 10477 } 10478 10479 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 10480 SourceLocation Loc, 10481 unsigned IVSize, bool IVSigned, 10482 Address IL, Address LB, 10483 Address UB, Address ST) { 10484 llvm_unreachable("Not supported in SIMD-only mode"); 10485 } 10486 10487 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 10488 llvm::Value *NumThreads, 10489 SourceLocation Loc) { 10490 llvm_unreachable("Not supported in SIMD-only mode"); 10491 } 10492 10493 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 10494 OpenMPProcBindClauseKind ProcBind, 10495 SourceLocation Loc) { 10496 llvm_unreachable("Not supported in SIMD-only mode"); 10497 } 10498 10499 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 10500 const VarDecl *VD, 10501 Address VDAddr, 10502 SourceLocation Loc) { 10503 llvm_unreachable("Not supported in SIMD-only mode"); 10504 } 10505 10506 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 10507 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 10508 CodeGenFunction *CGF) { 10509 llvm_unreachable("Not supported in SIMD-only mode"); 10510 } 10511 10512 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 10513 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 10514 llvm_unreachable("Not supported in SIMD-only mode"); 10515 } 10516 10517 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 10518 ArrayRef<const Expr *> Vars, 10519 SourceLocation Loc) { 10520 llvm_unreachable("Not supported in SIMD-only mode"); 10521 } 10522 10523 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 10524 const OMPExecutableDirective &D, 10525 llvm::Function *TaskFunction, 10526 QualType SharedsTy, Address Shareds, 10527 const Expr *IfCond, 10528 const OMPTaskDataTy &Data) { 10529 llvm_unreachable("Not supported in SIMD-only mode"); 10530 } 10531 10532 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 10533 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 10534 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 10535 const Expr *IfCond, const OMPTaskDataTy &Data) { 10536 llvm_unreachable("Not supported in SIMD-only mode"); 10537 } 10538 10539 void CGOpenMPSIMDRuntime::emitReduction( 10540 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 10541 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 10542 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 10543 assert(Options.SimpleReduction && "Only simple reduction is expected."); 10544 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 10545 ReductionOps, Options); 10546 } 10547 10548 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 10549 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 10550 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 10551 llvm_unreachable("Not supported in SIMD-only mode"); 10552 } 10553 10554 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 10555 SourceLocation Loc, 10556 ReductionCodeGen &RCG, 10557 unsigned N) { 10558 llvm_unreachable("Not supported in SIMD-only mode"); 10559 } 10560 10561 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 10562 SourceLocation Loc, 10563 llvm::Value *ReductionsPtr, 10564 LValue SharedLVal) { 10565 llvm_unreachable("Not supported in SIMD-only mode"); 10566 } 10567 10568 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 10569 SourceLocation Loc) { 10570 llvm_unreachable("Not supported in SIMD-only mode"); 10571 } 10572 10573 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 10574 CodeGenFunction &CGF, SourceLocation Loc, 10575 OpenMPDirectiveKind CancelRegion) { 10576 llvm_unreachable("Not supported in SIMD-only mode"); 10577 } 10578 10579 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 10580 SourceLocation Loc, const Expr *IfCond, 10581 OpenMPDirectiveKind CancelRegion) { 10582 llvm_unreachable("Not supported in SIMD-only mode"); 10583 } 10584 10585 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 10586 const OMPExecutableDirective &D, StringRef ParentName, 10587 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 10588 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 10589 llvm_unreachable("Not supported in SIMD-only mode"); 10590 } 10591 10592 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 10593 const OMPExecutableDirective &D, 10594 llvm::Function *OutlinedFn, 10595 llvm::Value *OutlinedFnID, 10596 const Expr *IfCond, 10597 const Expr *Device) { 10598 llvm_unreachable("Not supported in SIMD-only mode"); 10599 } 10600 10601 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 10602 llvm_unreachable("Not supported in SIMD-only mode"); 10603 } 10604 10605 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10606 llvm_unreachable("Not supported in SIMD-only mode"); 10607 } 10608 10609 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 10610 return false; 10611 } 10612 10613 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 10614 return nullptr; 10615 } 10616 10617 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 10618 const OMPExecutableDirective &D, 10619 SourceLocation Loc, 10620 llvm::Function *OutlinedFn, 10621 ArrayRef<llvm::Value *> CapturedVars) { 10622 llvm_unreachable("Not supported in SIMD-only mode"); 10623 } 10624 10625 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10626 const Expr *NumTeams, 10627 const Expr *ThreadLimit, 10628 SourceLocation Loc) { 10629 llvm_unreachable("Not supported in SIMD-only mode"); 10630 } 10631 10632 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 10633 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10634 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10635 llvm_unreachable("Not supported in SIMD-only mode"); 10636 } 10637 10638 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 10639 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10640 const Expr *Device) { 10641 llvm_unreachable("Not supported in SIMD-only mode"); 10642 } 10643 10644 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10645 const OMPLoopDirective &D, 10646 ArrayRef<Expr *> NumIterations) { 10647 llvm_unreachable("Not supported in SIMD-only mode"); 10648 } 10649 10650 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10651 const OMPDependClause *C) { 10652 llvm_unreachable("Not supported in SIMD-only mode"); 10653 } 10654 10655 const VarDecl * 10656 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 10657 const VarDecl *NativeParam) const { 10658 llvm_unreachable("Not supported in SIMD-only mode"); 10659 } 10660 10661 Address 10662 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 10663 const VarDecl *NativeParam, 10664 const VarDecl *TargetParam) const { 10665 llvm_unreachable("Not supported in SIMD-only mode"); 10666 } 10667