1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 /// Describes ident structure that describes a source location. 461 /// All descriptions are taken from 462 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 463 /// Original structure: 464 /// typedef struct ident { 465 /// kmp_int32 reserved_1; /**< might be used in Fortran; 466 /// see above */ 467 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 468 /// KMP_IDENT_KMPC identifies this union 469 /// member */ 470 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 471 /// see above */ 472 ///#if USE_ITT_BUILD 473 /// /* but currently used for storing 474 /// region-specific ITT */ 475 /// /* contextual information. */ 476 ///#endif /* USE_ITT_BUILD */ 477 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 478 /// C++ */ 479 /// char const *psource; /**< String describing the source location. 480 /// The string is composed of semi-colon separated 481 // fields which describe the source file, 482 /// the function and a pair of line numbers that 483 /// delimit the construct. 484 /// */ 485 /// } ident_t; 486 enum IdentFieldIndex { 487 /// might be used in Fortran 488 IdentField_Reserved_1, 489 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 490 IdentField_Flags, 491 /// Not really used in Fortran any more 492 IdentField_Reserved_2, 493 /// Source[4] in Fortran, do not use for C++ 494 IdentField_Reserved_3, 495 /// String describing the source location. The string is composed of 496 /// semi-colon separated fields which describe the source file, the function 497 /// and a pair of line numbers that delimit the construct. 498 IdentField_PSource 499 }; 500 501 /// Schedule types for 'omp for' loops (these enumerators are taken from 502 /// the enum sched_type in kmp.h). 503 enum OpenMPSchedType { 504 /// Lower bound for default (unordered) versions. 505 OMP_sch_lower = 32, 506 OMP_sch_static_chunked = 33, 507 OMP_sch_static = 34, 508 OMP_sch_dynamic_chunked = 35, 509 OMP_sch_guided_chunked = 36, 510 OMP_sch_runtime = 37, 511 OMP_sch_auto = 38, 512 /// static with chunk adjustment (e.g., simd) 513 OMP_sch_static_balanced_chunked = 45, 514 /// Lower bound for 'ordered' versions. 515 OMP_ord_lower = 64, 516 OMP_ord_static_chunked = 65, 517 OMP_ord_static = 66, 518 OMP_ord_dynamic_chunked = 67, 519 OMP_ord_guided_chunked = 68, 520 OMP_ord_runtime = 69, 521 OMP_ord_auto = 70, 522 OMP_sch_default = OMP_sch_static, 523 /// dist_schedule types 524 OMP_dist_sch_static_chunked = 91, 525 OMP_dist_sch_static = 92, 526 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 527 /// Set if the monotonic schedule modifier was present. 528 OMP_sch_modifier_monotonic = (1 << 29), 529 /// Set if the nonmonotonic schedule modifier was present. 530 OMP_sch_modifier_nonmonotonic = (1 << 30), 531 }; 532 533 enum OpenMPRTLFunction { 534 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 535 /// kmpc_micro microtask, ...); 536 OMPRTL__kmpc_fork_call, 537 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 538 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 539 OMPRTL__kmpc_threadprivate_cached, 540 /// Call to void __kmpc_threadprivate_register( ident_t *, 541 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 542 OMPRTL__kmpc_threadprivate_register, 543 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 544 OMPRTL__kmpc_global_thread_num, 545 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 546 // kmp_critical_name *crit); 547 OMPRTL__kmpc_critical, 548 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 549 // global_tid, kmp_critical_name *crit, uintptr_t hint); 550 OMPRTL__kmpc_critical_with_hint, 551 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 552 // kmp_critical_name *crit); 553 OMPRTL__kmpc_end_critical, 554 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 555 // global_tid); 556 OMPRTL__kmpc_cancel_barrier, 557 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 558 OMPRTL__kmpc_barrier, 559 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 560 OMPRTL__kmpc_for_static_fini, 561 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 562 // global_tid); 563 OMPRTL__kmpc_serialized_parallel, 564 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 565 // global_tid); 566 OMPRTL__kmpc_end_serialized_parallel, 567 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 568 // kmp_int32 num_threads); 569 OMPRTL__kmpc_push_num_threads, 570 // Call to void __kmpc_flush(ident_t *loc); 571 OMPRTL__kmpc_flush, 572 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 573 OMPRTL__kmpc_master, 574 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 575 OMPRTL__kmpc_end_master, 576 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 577 // int end_part); 578 OMPRTL__kmpc_omp_taskyield, 579 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 580 OMPRTL__kmpc_single, 581 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 582 OMPRTL__kmpc_end_single, 583 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 584 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 585 // kmp_routine_entry_t *task_entry); 586 OMPRTL__kmpc_omp_task_alloc, 587 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 588 // new_task); 589 OMPRTL__kmpc_omp_task, 590 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 591 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 592 // kmp_int32 didit); 593 OMPRTL__kmpc_copyprivate, 594 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 595 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 596 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 597 OMPRTL__kmpc_reduce, 598 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 599 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 600 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 601 // *lck); 602 OMPRTL__kmpc_reduce_nowait, 603 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 604 // kmp_critical_name *lck); 605 OMPRTL__kmpc_end_reduce, 606 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 607 // kmp_critical_name *lck); 608 OMPRTL__kmpc_end_reduce_nowait, 609 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 610 // kmp_task_t * new_task); 611 OMPRTL__kmpc_omp_task_begin_if0, 612 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 613 // kmp_task_t * new_task); 614 OMPRTL__kmpc_omp_task_complete_if0, 615 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 616 OMPRTL__kmpc_ordered, 617 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 618 OMPRTL__kmpc_end_ordered, 619 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 620 // global_tid); 621 OMPRTL__kmpc_omp_taskwait, 622 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 623 OMPRTL__kmpc_taskgroup, 624 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 625 OMPRTL__kmpc_end_taskgroup, 626 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 627 // int proc_bind); 628 OMPRTL__kmpc_push_proc_bind, 629 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 630 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 631 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 632 OMPRTL__kmpc_omp_task_with_deps, 633 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 634 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 635 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 636 OMPRTL__kmpc_omp_wait_deps, 637 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 638 // global_tid, kmp_int32 cncl_kind); 639 OMPRTL__kmpc_cancellationpoint, 640 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 641 // kmp_int32 cncl_kind); 642 OMPRTL__kmpc_cancel, 643 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 644 // kmp_int32 num_teams, kmp_int32 thread_limit); 645 OMPRTL__kmpc_push_num_teams, 646 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 647 // microtask, ...); 648 OMPRTL__kmpc_fork_teams, 649 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 650 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 651 // sched, kmp_uint64 grainsize, void *task_dup); 652 OMPRTL__kmpc_taskloop, 653 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 654 // num_dims, struct kmp_dim *dims); 655 OMPRTL__kmpc_doacross_init, 656 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 657 OMPRTL__kmpc_doacross_fini, 658 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 659 // *vec); 660 OMPRTL__kmpc_doacross_post, 661 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 662 // *vec); 663 OMPRTL__kmpc_doacross_wait, 664 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 665 // *data); 666 OMPRTL__kmpc_task_reduction_init, 667 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 668 // *d); 669 OMPRTL__kmpc_task_reduction_get_th_data, 670 671 // 672 // Offloading related calls 673 // 674 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 675 // size); 676 OMPRTL__kmpc_push_target_tripcount, 677 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 678 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 679 // *arg_types); 680 OMPRTL__tgt_target, 681 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 682 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 683 // *arg_types); 684 OMPRTL__tgt_target_nowait, 685 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 686 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 687 // *arg_types, int32_t num_teams, int32_t thread_limit); 688 OMPRTL__tgt_target_teams, 689 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 690 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 691 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 692 OMPRTL__tgt_target_teams_nowait, 693 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 694 OMPRTL__tgt_register_lib, 695 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 696 OMPRTL__tgt_unregister_lib, 697 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 698 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 699 OMPRTL__tgt_target_data_begin, 700 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 701 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 702 // *arg_types); 703 OMPRTL__tgt_target_data_begin_nowait, 704 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 705 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 706 OMPRTL__tgt_target_data_end, 707 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 708 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 709 // *arg_types); 710 OMPRTL__tgt_target_data_end_nowait, 711 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 712 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 713 OMPRTL__tgt_target_data_update, 714 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 715 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 716 // *arg_types); 717 OMPRTL__tgt_target_data_update_nowait, 718 }; 719 720 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 721 /// region. 722 class CleanupTy final : public EHScopeStack::Cleanup { 723 PrePostActionTy *Action; 724 725 public: 726 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 727 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 728 if (!CGF.HaveInsertPoint()) 729 return; 730 Action->Exit(CGF); 731 } 732 }; 733 734 } // anonymous namespace 735 736 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 737 CodeGenFunction::RunCleanupsScope Scope(CGF); 738 if (PrePostAction) { 739 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 740 Callback(CodeGen, CGF, *PrePostAction); 741 } else { 742 PrePostActionTy Action; 743 Callback(CodeGen, CGF, Action); 744 } 745 } 746 747 /// Check if the combiner is a call to UDR combiner and if it is so return the 748 /// UDR decl used for reduction. 749 static const OMPDeclareReductionDecl * 750 getReductionInit(const Expr *ReductionOp) { 751 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 752 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 753 if (const auto *DRE = 754 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 755 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 756 return DRD; 757 return nullptr; 758 } 759 760 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 761 const OMPDeclareReductionDecl *DRD, 762 const Expr *InitOp, 763 Address Private, Address Original, 764 QualType Ty) { 765 if (DRD->getInitializer()) { 766 std::pair<llvm::Function *, llvm::Function *> Reduction = 767 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 768 const auto *CE = cast<CallExpr>(InitOp); 769 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 770 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 771 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 772 const auto *LHSDRE = 773 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 774 const auto *RHSDRE = 775 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 776 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 777 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 778 [=]() { return Private; }); 779 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 780 [=]() { return Original; }); 781 (void)PrivateScope.Privatize(); 782 RValue Func = RValue::get(Reduction.second); 783 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 784 CGF.EmitIgnoredExpr(InitOp); 785 } else { 786 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 787 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 788 auto *GV = new llvm::GlobalVariable( 789 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 790 llvm::GlobalValue::PrivateLinkage, Init, Name); 791 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 792 RValue InitRVal; 793 switch (CGF.getEvaluationKind(Ty)) { 794 case TEK_Scalar: 795 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 796 break; 797 case TEK_Complex: 798 InitRVal = 799 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 800 break; 801 case TEK_Aggregate: 802 InitRVal = RValue::getAggregate(LV.getAddress()); 803 break; 804 } 805 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 806 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 807 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 808 /*IsInitializer=*/false); 809 } 810 } 811 812 /// Emit initialization of arrays of complex types. 813 /// \param DestAddr Address of the array. 814 /// \param Type Type of array. 815 /// \param Init Initial expression of array. 816 /// \param SrcAddr Address of the original array. 817 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 818 QualType Type, bool EmitDeclareReductionInit, 819 const Expr *Init, 820 const OMPDeclareReductionDecl *DRD, 821 Address SrcAddr = Address::invalid()) { 822 // Perform element-by-element initialization. 823 QualType ElementTy; 824 825 // Drill down to the base element type on both arrays. 826 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 827 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 828 DestAddr = 829 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 830 if (DRD) 831 SrcAddr = 832 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 833 834 llvm::Value *SrcBegin = nullptr; 835 if (DRD) 836 SrcBegin = SrcAddr.getPointer(); 837 llvm::Value *DestBegin = DestAddr.getPointer(); 838 // Cast from pointer to array type to pointer to single element. 839 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 840 // The basic structure here is a while-do loop. 841 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 842 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 843 llvm::Value *IsEmpty = 844 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 845 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 846 847 // Enter the loop body, making that address the current address. 848 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 849 CGF.EmitBlock(BodyBB); 850 851 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 852 853 llvm::PHINode *SrcElementPHI = nullptr; 854 Address SrcElementCurrent = Address::invalid(); 855 if (DRD) { 856 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 857 "omp.arraycpy.srcElementPast"); 858 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 859 SrcElementCurrent = 860 Address(SrcElementPHI, 861 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 862 } 863 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 864 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 865 DestElementPHI->addIncoming(DestBegin, EntryBB); 866 Address DestElementCurrent = 867 Address(DestElementPHI, 868 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 869 870 // Emit copy. 871 { 872 CodeGenFunction::RunCleanupsScope InitScope(CGF); 873 if (EmitDeclareReductionInit) { 874 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 875 SrcElementCurrent, ElementTy); 876 } else 877 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 878 /*IsInitializer=*/false); 879 } 880 881 if (DRD) { 882 // Shift the address forward by one element. 883 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 884 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 885 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 886 } 887 888 // Shift the address forward by one element. 889 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 890 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 891 // Check whether we've reached the end. 892 llvm::Value *Done = 893 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 894 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 895 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 896 897 // Done. 898 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 899 } 900 901 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 902 return CGF.EmitOMPSharedLValue(E); 903 } 904 905 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 906 const Expr *E) { 907 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 908 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 909 return LValue(); 910 } 911 912 void ReductionCodeGen::emitAggregateInitialization( 913 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 914 const OMPDeclareReductionDecl *DRD) { 915 // Emit VarDecl with copy init for arrays. 916 // Get the address of the original variable captured in current 917 // captured region. 918 const auto *PrivateVD = 919 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 920 bool EmitDeclareReductionInit = 921 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 922 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 923 EmitDeclareReductionInit, 924 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 925 : PrivateVD->getInit(), 926 DRD, SharedLVal.getAddress()); 927 } 928 929 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 930 ArrayRef<const Expr *> Privates, 931 ArrayRef<const Expr *> ReductionOps) { 932 ClausesData.reserve(Shareds.size()); 933 SharedAddresses.reserve(Shareds.size()); 934 Sizes.reserve(Shareds.size()); 935 BaseDecls.reserve(Shareds.size()); 936 auto IPriv = Privates.begin(); 937 auto IRed = ReductionOps.begin(); 938 for (const Expr *Ref : Shareds) { 939 ClausesData.emplace_back(Ref, *IPriv, *IRed); 940 std::advance(IPriv, 1); 941 std::advance(IRed, 1); 942 } 943 } 944 945 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 946 assert(SharedAddresses.size() == N && 947 "Number of generated lvalues must be exactly N."); 948 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 949 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 950 SharedAddresses.emplace_back(First, Second); 951 } 952 953 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 954 const auto *PrivateVD = 955 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 956 QualType PrivateType = PrivateVD->getType(); 957 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 958 if (!PrivateType->isVariablyModifiedType()) { 959 Sizes.emplace_back( 960 CGF.getTypeSize( 961 SharedAddresses[N].first.getType().getNonReferenceType()), 962 nullptr); 963 return; 964 } 965 llvm::Value *Size; 966 llvm::Value *SizeInChars; 967 auto *ElemType = 968 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 969 ->getElementType(); 970 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 971 if (AsArraySection) { 972 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 973 SharedAddresses[N].first.getPointer()); 974 Size = CGF.Builder.CreateNUWAdd( 975 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 976 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 977 } else { 978 SizeInChars = CGF.getTypeSize( 979 SharedAddresses[N].first.getType().getNonReferenceType()); 980 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 981 } 982 Sizes.emplace_back(SizeInChars, Size); 983 CodeGenFunction::OpaqueValueMapping OpaqueMap( 984 CGF, 985 cast<OpaqueValueExpr>( 986 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 987 RValue::get(Size)); 988 CGF.EmitVariablyModifiedType(PrivateType); 989 } 990 991 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 992 llvm::Value *Size) { 993 const auto *PrivateVD = 994 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 995 QualType PrivateType = PrivateVD->getType(); 996 if (!PrivateType->isVariablyModifiedType()) { 997 assert(!Size && !Sizes[N].second && 998 "Size should be nullptr for non-variably modified reduction " 999 "items."); 1000 return; 1001 } 1002 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1003 CGF, 1004 cast<OpaqueValueExpr>( 1005 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1006 RValue::get(Size)); 1007 CGF.EmitVariablyModifiedType(PrivateType); 1008 } 1009 1010 void ReductionCodeGen::emitInitialization( 1011 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1012 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1013 assert(SharedAddresses.size() > N && "No variable was generated"); 1014 const auto *PrivateVD = 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1016 const OMPDeclareReductionDecl *DRD = 1017 getReductionInit(ClausesData[N].ReductionOp); 1018 QualType PrivateType = PrivateVD->getType(); 1019 PrivateAddr = CGF.Builder.CreateElementBitCast( 1020 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1021 QualType SharedType = SharedAddresses[N].first.getType(); 1022 SharedLVal = CGF.MakeAddrLValue( 1023 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1024 CGF.ConvertTypeForMem(SharedType)), 1025 SharedType, SharedAddresses[N].first.getBaseInfo(), 1026 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1027 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1028 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1029 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1030 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1031 PrivateAddr, SharedLVal.getAddress(), 1032 SharedLVal.getType()); 1033 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1034 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1035 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1036 PrivateVD->getType().getQualifiers(), 1037 /*IsInitializer=*/false); 1038 } 1039 } 1040 1041 bool ReductionCodeGen::needCleanups(unsigned N) { 1042 const auto *PrivateVD = 1043 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1044 QualType PrivateType = PrivateVD->getType(); 1045 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1046 return DTorKind != QualType::DK_none; 1047 } 1048 1049 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1050 Address PrivateAddr) { 1051 const auto *PrivateVD = 1052 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1053 QualType PrivateType = PrivateVD->getType(); 1054 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1055 if (needCleanups(N)) { 1056 PrivateAddr = CGF.Builder.CreateElementBitCast( 1057 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1058 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1059 } 1060 } 1061 1062 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1063 LValue BaseLV) { 1064 BaseTy = BaseTy.getNonReferenceType(); 1065 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1066 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1067 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1068 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1069 } else { 1070 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1071 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1072 } 1073 BaseTy = BaseTy->getPointeeType(); 1074 } 1075 return CGF.MakeAddrLValue( 1076 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1077 CGF.ConvertTypeForMem(ElTy)), 1078 BaseLV.getType(), BaseLV.getBaseInfo(), 1079 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1080 } 1081 1082 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1083 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1084 llvm::Value *Addr) { 1085 Address Tmp = Address::invalid(); 1086 Address TopTmp = Address::invalid(); 1087 Address MostTopTmp = Address::invalid(); 1088 BaseTy = BaseTy.getNonReferenceType(); 1089 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1090 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1091 Tmp = CGF.CreateMemTemp(BaseTy); 1092 if (TopTmp.isValid()) 1093 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1094 else 1095 MostTopTmp = Tmp; 1096 TopTmp = Tmp; 1097 BaseTy = BaseTy->getPointeeType(); 1098 } 1099 llvm::Type *Ty = BaseLVType; 1100 if (Tmp.isValid()) 1101 Ty = Tmp.getElementType(); 1102 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1103 if (Tmp.isValid()) { 1104 CGF.Builder.CreateStore(Addr, Tmp); 1105 return MostTopTmp; 1106 } 1107 return Address(Addr, BaseLVAlignment); 1108 } 1109 1110 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1111 const VarDecl *OrigVD = nullptr; 1112 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1113 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1114 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1115 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1116 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1117 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1118 DE = cast<DeclRefExpr>(Base); 1119 OrigVD = cast<VarDecl>(DE->getDecl()); 1120 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1121 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1122 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1123 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1124 DE = cast<DeclRefExpr>(Base); 1125 OrigVD = cast<VarDecl>(DE->getDecl()); 1126 } 1127 return OrigVD; 1128 } 1129 1130 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1131 Address PrivateAddr) { 1132 const DeclRefExpr *DE; 1133 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1134 BaseDecls.emplace_back(OrigVD); 1135 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1136 LValue BaseLValue = 1137 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1138 OriginalBaseLValue); 1139 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1140 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1141 llvm::Value *PrivatePointer = 1142 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1143 PrivateAddr.getPointer(), 1144 SharedAddresses[N].first.getAddress().getType()); 1145 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1146 return castToBase(CGF, OrigVD->getType(), 1147 SharedAddresses[N].first.getType(), 1148 OriginalBaseLValue.getAddress().getType(), 1149 OriginalBaseLValue.getAlignment(), Ptr); 1150 } 1151 BaseDecls.emplace_back( 1152 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1153 return PrivateAddr; 1154 } 1155 1156 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1157 const OMPDeclareReductionDecl *DRD = 1158 getReductionInit(ClausesData[N].ReductionOp); 1159 return DRD && DRD->getInitializer(); 1160 } 1161 1162 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1163 return CGF.EmitLoadOfPointerLValue( 1164 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1165 getThreadIDVariable()->getType()->castAs<PointerType>()); 1166 } 1167 1168 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1169 if (!CGF.HaveInsertPoint()) 1170 return; 1171 // 1.2.2 OpenMP Language Terminology 1172 // Structured block - An executable statement with a single entry at the 1173 // top and a single exit at the bottom. 1174 // The point of exit cannot be a branch out of the structured block. 1175 // longjmp() and throw() must not violate the entry/exit criteria. 1176 CGF.EHStack.pushTerminate(); 1177 CodeGen(CGF); 1178 CGF.EHStack.popTerminate(); 1179 } 1180 1181 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1182 CodeGenFunction &CGF) { 1183 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1184 getThreadIDVariable()->getType(), 1185 AlignmentSource::Decl); 1186 } 1187 1188 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1189 QualType FieldTy) { 1190 auto *Field = FieldDecl::Create( 1191 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1192 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1193 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1194 Field->setAccess(AS_public); 1195 DC->addDecl(Field); 1196 return Field; 1197 } 1198 1199 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1200 StringRef Separator) 1201 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1202 OffloadEntriesInfoManager(CGM) { 1203 ASTContext &C = CGM.getContext(); 1204 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1205 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1206 RD->startDefinition(); 1207 // reserved_1 1208 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1209 // flags 1210 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1211 // reserved_2 1212 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1213 // reserved_3 1214 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1215 // psource 1216 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1217 RD->completeDefinition(); 1218 IdentQTy = C.getRecordType(RD); 1219 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1220 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1221 1222 loadOffloadInfoMetadata(); 1223 } 1224 1225 void CGOpenMPRuntime::clear() { 1226 InternalVars.clear(); 1227 // Clean non-target variable declarations possibly used only in debug info. 1228 for (const auto &Data : EmittedNonTargetVariables) { 1229 if (!Data.getValue().pointsToAliveValue()) 1230 continue; 1231 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1232 if (!GV) 1233 continue; 1234 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1235 continue; 1236 GV->eraseFromParent(); 1237 } 1238 } 1239 1240 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1241 SmallString<128> Buffer; 1242 llvm::raw_svector_ostream OS(Buffer); 1243 StringRef Sep = FirstSeparator; 1244 for (StringRef Part : Parts) { 1245 OS << Sep << Part; 1246 Sep = Separator; 1247 } 1248 return OS.str(); 1249 } 1250 1251 static llvm::Function * 1252 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1253 const Expr *CombinerInitializer, const VarDecl *In, 1254 const VarDecl *Out, bool IsCombiner) { 1255 // void .omp_combiner.(Ty *in, Ty *out); 1256 ASTContext &C = CGM.getContext(); 1257 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1258 FunctionArgList Args; 1259 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1260 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1261 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1262 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1263 Args.push_back(&OmpOutParm); 1264 Args.push_back(&OmpInParm); 1265 const CGFunctionInfo &FnInfo = 1266 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1267 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1268 std::string Name = CGM.getOpenMPRuntime().getName( 1269 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1270 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1271 Name, &CGM.getModule()); 1272 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1273 Fn->removeFnAttr(llvm::Attribute::NoInline); 1274 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1275 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1276 CodeGenFunction CGF(CGM); 1277 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1278 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1279 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1280 Out->getLocation()); 1281 CodeGenFunction::OMPPrivateScope Scope(CGF); 1282 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1283 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1284 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1285 .getAddress(); 1286 }); 1287 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1288 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1289 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1290 .getAddress(); 1291 }); 1292 (void)Scope.Privatize(); 1293 if (!IsCombiner && Out->hasInit() && 1294 !CGF.isTrivialInitializer(Out->getInit())) { 1295 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1296 Out->getType().getQualifiers(), 1297 /*IsInitializer=*/true); 1298 } 1299 if (CombinerInitializer) 1300 CGF.EmitIgnoredExpr(CombinerInitializer); 1301 Scope.ForceCleanup(); 1302 CGF.FinishFunction(); 1303 return Fn; 1304 } 1305 1306 void CGOpenMPRuntime::emitUserDefinedReduction( 1307 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1308 if (UDRMap.count(D) > 0) 1309 return; 1310 llvm::Function *Combiner = emitCombinerOrInitializer( 1311 CGM, D->getType(), D->getCombiner(), 1312 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1313 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1314 /*IsCombiner=*/true); 1315 llvm::Function *Initializer = nullptr; 1316 if (const Expr *Init = D->getInitializer()) { 1317 Initializer = emitCombinerOrInitializer( 1318 CGM, D->getType(), 1319 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1320 : nullptr, 1321 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1322 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1323 /*IsCombiner=*/false); 1324 } 1325 UDRMap.try_emplace(D, Combiner, Initializer); 1326 if (CGF) { 1327 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1328 Decls.second.push_back(D); 1329 } 1330 } 1331 1332 std::pair<llvm::Function *, llvm::Function *> 1333 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1334 auto I = UDRMap.find(D); 1335 if (I != UDRMap.end()) 1336 return I->second; 1337 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1338 return UDRMap.lookup(D); 1339 } 1340 1341 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1342 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1343 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1344 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1345 assert(ThreadIDVar->getType()->isPointerType() && 1346 "thread id variable must be of type kmp_int32 *"); 1347 CodeGenFunction CGF(CGM, true); 1348 bool HasCancel = false; 1349 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1350 HasCancel = OPD->hasCancel(); 1351 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1352 HasCancel = OPSD->hasCancel(); 1353 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1354 HasCancel = OPFD->hasCancel(); 1355 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1356 HasCancel = OPFD->hasCancel(); 1357 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1358 HasCancel = OPFD->hasCancel(); 1359 else if (const auto *OPFD = 1360 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1361 HasCancel = OPFD->hasCancel(); 1362 else if (const auto *OPFD = 1363 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1364 HasCancel = OPFD->hasCancel(); 1365 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1366 HasCancel, OutlinedHelperName); 1367 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1368 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1369 } 1370 1371 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1372 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1373 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1374 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1375 return emitParallelOrTeamsOutlinedFunction( 1376 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1377 } 1378 1379 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1380 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1381 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1382 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1383 return emitParallelOrTeamsOutlinedFunction( 1384 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1385 } 1386 1387 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1388 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1389 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1390 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1391 bool Tied, unsigned &NumberOfParts) { 1392 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1393 PrePostActionTy &) { 1394 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1395 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1396 llvm::Value *TaskArgs[] = { 1397 UpLoc, ThreadID, 1398 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1399 TaskTVar->getType()->castAs<PointerType>()) 1400 .getPointer()}; 1401 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1402 }; 1403 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1404 UntiedCodeGen); 1405 CodeGen.setAction(Action); 1406 assert(!ThreadIDVar->getType()->isPointerType() && 1407 "thread id variable must be of type kmp_int32 for tasks"); 1408 const OpenMPDirectiveKind Region = 1409 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1410 : OMPD_task; 1411 const CapturedStmt *CS = D.getCapturedStmt(Region); 1412 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1413 CodeGenFunction CGF(CGM, true); 1414 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1415 InnermostKind, 1416 TD ? TD->hasCancel() : false, Action); 1417 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1418 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1419 if (!Tied) 1420 NumberOfParts = Action.getNumberOfParts(); 1421 return Res; 1422 } 1423 1424 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1425 const RecordDecl *RD, const CGRecordLayout &RL, 1426 ArrayRef<llvm::Constant *> Data) { 1427 llvm::StructType *StructTy = RL.getLLVMType(); 1428 unsigned PrevIdx = 0; 1429 ConstantInitBuilder CIBuilder(CGM); 1430 auto DI = Data.begin(); 1431 for (const FieldDecl *FD : RD->fields()) { 1432 unsigned Idx = RL.getLLVMFieldNo(FD); 1433 // Fill the alignment. 1434 for (unsigned I = PrevIdx; I < Idx; ++I) 1435 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1436 PrevIdx = Idx + 1; 1437 Fields.add(*DI); 1438 ++DI; 1439 } 1440 } 1441 1442 template <class... As> 1443 static llvm::GlobalVariable * 1444 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1445 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1446 As &&... Args) { 1447 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1448 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1449 ConstantInitBuilder CIBuilder(CGM); 1450 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1451 buildStructValue(Fields, CGM, RD, RL, Data); 1452 return Fields.finishAndCreateGlobal( 1453 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1454 std::forward<As>(Args)...); 1455 } 1456 1457 template <typename T> 1458 static void 1459 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1460 ArrayRef<llvm::Constant *> Data, 1461 T &Parent) { 1462 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1463 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1464 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1465 buildStructValue(Fields, CGM, RD, RL, Data); 1466 Fields.finishAndAddTo(Parent); 1467 } 1468 1469 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1470 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1471 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1472 FlagsTy FlagsKey(Flags, Reserved2Flags); 1473 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1474 if (!Entry) { 1475 if (!DefaultOpenMPPSource) { 1476 // Initialize default location for psource field of ident_t structure of 1477 // all ident_t objects. Format is ";file;function;line;column;;". 1478 // Taken from 1479 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1480 DefaultOpenMPPSource = 1481 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1482 DefaultOpenMPPSource = 1483 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1484 } 1485 1486 llvm::Constant *Data[] = { 1487 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1488 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1489 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1490 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1491 llvm::GlobalValue *DefaultOpenMPLocation = 1492 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1493 llvm::GlobalValue::PrivateLinkage); 1494 DefaultOpenMPLocation->setUnnamedAddr( 1495 llvm::GlobalValue::UnnamedAddr::Global); 1496 1497 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1498 } 1499 return Address(Entry, Align); 1500 } 1501 1502 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1503 bool AtCurrentPoint) { 1504 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1505 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1506 1507 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1508 if (AtCurrentPoint) { 1509 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1510 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1511 } else { 1512 Elem.second.ServiceInsertPt = 1513 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1514 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1515 } 1516 } 1517 1518 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1519 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1520 if (Elem.second.ServiceInsertPt) { 1521 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1522 Elem.second.ServiceInsertPt = nullptr; 1523 Ptr->eraseFromParent(); 1524 } 1525 } 1526 1527 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1528 SourceLocation Loc, 1529 unsigned Flags) { 1530 Flags |= OMP_IDENT_KMPC; 1531 // If no debug info is generated - return global default location. 1532 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1533 Loc.isInvalid()) 1534 return getOrCreateDefaultLocation(Flags).getPointer(); 1535 1536 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1537 1538 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1539 Address LocValue = Address::invalid(); 1540 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1541 if (I != OpenMPLocThreadIDMap.end()) 1542 LocValue = Address(I->second.DebugLoc, Align); 1543 1544 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1545 // GetOpenMPThreadID was called before this routine. 1546 if (!LocValue.isValid()) { 1547 // Generate "ident_t .kmpc_loc.addr;" 1548 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1549 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1550 Elem.second.DebugLoc = AI.getPointer(); 1551 LocValue = AI; 1552 1553 if (!Elem.second.ServiceInsertPt) 1554 setLocThreadIdInsertPt(CGF); 1555 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1556 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1557 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1558 CGF.getTypeSize(IdentQTy)); 1559 } 1560 1561 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1562 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1563 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1564 LValue PSource = 1565 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1566 1567 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1568 if (OMPDebugLoc == nullptr) { 1569 SmallString<128> Buffer2; 1570 llvm::raw_svector_ostream OS2(Buffer2); 1571 // Build debug location 1572 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1573 OS2 << ";" << PLoc.getFilename() << ";"; 1574 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1575 OS2 << FD->getQualifiedNameAsString(); 1576 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1577 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1578 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1579 } 1580 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1581 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1582 1583 // Our callers always pass this to a runtime function, so for 1584 // convenience, go ahead and return a naked pointer. 1585 return LocValue.getPointer(); 1586 } 1587 1588 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1589 SourceLocation Loc) { 1590 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1591 1592 llvm::Value *ThreadID = nullptr; 1593 // Check whether we've already cached a load of the thread id in this 1594 // function. 1595 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1596 if (I != OpenMPLocThreadIDMap.end()) { 1597 ThreadID = I->second.ThreadID; 1598 if (ThreadID != nullptr) 1599 return ThreadID; 1600 } 1601 // If exceptions are enabled, do not use parameter to avoid possible crash. 1602 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1603 !CGF.getLangOpts().CXXExceptions || 1604 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1605 if (auto *OMPRegionInfo = 1606 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1607 if (OMPRegionInfo->getThreadIDVariable()) { 1608 // Check if this an outlined function with thread id passed as argument. 1609 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1610 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1611 // If value loaded in entry block, cache it and use it everywhere in 1612 // function. 1613 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1614 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1615 Elem.second.ThreadID = ThreadID; 1616 } 1617 return ThreadID; 1618 } 1619 } 1620 } 1621 1622 // This is not an outlined function region - need to call __kmpc_int32 1623 // kmpc_global_thread_num(ident_t *loc). 1624 // Generate thread id value and cache this value for use across the 1625 // function. 1626 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1627 if (!Elem.second.ServiceInsertPt) 1628 setLocThreadIdInsertPt(CGF); 1629 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1630 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1631 llvm::CallInst *Call = CGF.Builder.CreateCall( 1632 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1633 emitUpdateLocation(CGF, Loc)); 1634 Call->setCallingConv(CGF.getRuntimeCC()); 1635 Elem.second.ThreadID = Call; 1636 return Call; 1637 } 1638 1639 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1640 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1641 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1642 clearLocThreadIdInsertPt(CGF); 1643 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1644 } 1645 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1646 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1647 UDRMap.erase(D); 1648 FunctionUDRMap.erase(CGF.CurFn); 1649 } 1650 } 1651 1652 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1653 return IdentTy->getPointerTo(); 1654 } 1655 1656 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1657 if (!Kmpc_MicroTy) { 1658 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1659 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1660 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1661 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1662 } 1663 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1664 } 1665 1666 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1667 llvm::FunctionCallee RTLFn = nullptr; 1668 switch (static_cast<OpenMPRTLFunction>(Function)) { 1669 case OMPRTL__kmpc_fork_call: { 1670 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1671 // microtask, ...); 1672 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1673 getKmpc_MicroPointerTy()}; 1674 auto *FnTy = 1675 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1676 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1677 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1678 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1679 llvm::LLVMContext &Ctx = F->getContext(); 1680 llvm::MDBuilder MDB(Ctx); 1681 // Annotate the callback behavior of the __kmpc_fork_call: 1682 // - The callback callee is argument number 2 (microtask). 1683 // - The first two arguments of the callback callee are unknown (-1). 1684 // - All variadic arguments to the __kmpc_fork_call are passed to the 1685 // callback callee. 1686 F->addMetadata( 1687 llvm::LLVMContext::MD_callback, 1688 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1689 2, {-1, -1}, 1690 /* VarArgsArePassed */ true)})); 1691 } 1692 } 1693 break; 1694 } 1695 case OMPRTL__kmpc_global_thread_num: { 1696 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1697 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1698 auto *FnTy = 1699 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1700 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1701 break; 1702 } 1703 case OMPRTL__kmpc_threadprivate_cached: { 1704 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1705 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1706 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1707 CGM.VoidPtrTy, CGM.SizeTy, 1708 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1709 auto *FnTy = 1710 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1711 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1712 break; 1713 } 1714 case OMPRTL__kmpc_critical: { 1715 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1716 // kmp_critical_name *crit); 1717 llvm::Type *TypeParams[] = { 1718 getIdentTyPointerTy(), CGM.Int32Ty, 1719 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1720 auto *FnTy = 1721 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1722 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1723 break; 1724 } 1725 case OMPRTL__kmpc_critical_with_hint: { 1726 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1727 // kmp_critical_name *crit, uintptr_t hint); 1728 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1729 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1730 CGM.IntPtrTy}; 1731 auto *FnTy = 1732 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1733 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1734 break; 1735 } 1736 case OMPRTL__kmpc_threadprivate_register: { 1737 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1738 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1739 // typedef void *(*kmpc_ctor)(void *); 1740 auto *KmpcCtorTy = 1741 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1742 /*isVarArg*/ false)->getPointerTo(); 1743 // typedef void *(*kmpc_cctor)(void *, void *); 1744 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1745 auto *KmpcCopyCtorTy = 1746 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1747 /*isVarArg*/ false) 1748 ->getPointerTo(); 1749 // typedef void (*kmpc_dtor)(void *); 1750 auto *KmpcDtorTy = 1751 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1752 ->getPointerTo(); 1753 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1754 KmpcCopyCtorTy, KmpcDtorTy}; 1755 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1756 /*isVarArg*/ false); 1757 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1758 break; 1759 } 1760 case OMPRTL__kmpc_end_critical: { 1761 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1762 // kmp_critical_name *crit); 1763 llvm::Type *TypeParams[] = { 1764 getIdentTyPointerTy(), CGM.Int32Ty, 1765 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1766 auto *FnTy = 1767 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1768 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1769 break; 1770 } 1771 case OMPRTL__kmpc_cancel_barrier: { 1772 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1773 // global_tid); 1774 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1775 auto *FnTy = 1776 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1777 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1778 break; 1779 } 1780 case OMPRTL__kmpc_barrier: { 1781 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1782 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1783 auto *FnTy = 1784 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1785 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1786 break; 1787 } 1788 case OMPRTL__kmpc_for_static_fini: { 1789 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1790 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1791 auto *FnTy = 1792 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1793 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1794 break; 1795 } 1796 case OMPRTL__kmpc_push_num_threads: { 1797 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1798 // kmp_int32 num_threads) 1799 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1800 CGM.Int32Ty}; 1801 auto *FnTy = 1802 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1803 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1804 break; 1805 } 1806 case OMPRTL__kmpc_serialized_parallel: { 1807 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1808 // global_tid); 1809 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1810 auto *FnTy = 1811 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1812 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1813 break; 1814 } 1815 case OMPRTL__kmpc_end_serialized_parallel: { 1816 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1817 // global_tid); 1818 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1819 auto *FnTy = 1820 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1821 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1822 break; 1823 } 1824 case OMPRTL__kmpc_flush: { 1825 // Build void __kmpc_flush(ident_t *loc); 1826 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1827 auto *FnTy = 1828 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1829 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1830 break; 1831 } 1832 case OMPRTL__kmpc_master: { 1833 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1834 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1835 auto *FnTy = 1836 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1837 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1838 break; 1839 } 1840 case OMPRTL__kmpc_end_master: { 1841 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1842 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1843 auto *FnTy = 1844 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1845 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1846 break; 1847 } 1848 case OMPRTL__kmpc_omp_taskyield: { 1849 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1850 // int end_part); 1851 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1852 auto *FnTy = 1853 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1854 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1855 break; 1856 } 1857 case OMPRTL__kmpc_single: { 1858 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1859 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1860 auto *FnTy = 1861 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1862 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1863 break; 1864 } 1865 case OMPRTL__kmpc_end_single: { 1866 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1867 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1868 auto *FnTy = 1869 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1870 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1871 break; 1872 } 1873 case OMPRTL__kmpc_omp_task_alloc: { 1874 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1875 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1876 // kmp_routine_entry_t *task_entry); 1877 assert(KmpRoutineEntryPtrTy != nullptr && 1878 "Type kmp_routine_entry_t must be created."); 1879 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1880 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1881 // Return void * and then cast to particular kmp_task_t type. 1882 auto *FnTy = 1883 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1884 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1885 break; 1886 } 1887 case OMPRTL__kmpc_omp_task: { 1888 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1889 // *new_task); 1890 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1891 CGM.VoidPtrTy}; 1892 auto *FnTy = 1893 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1894 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1895 break; 1896 } 1897 case OMPRTL__kmpc_copyprivate: { 1898 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1899 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1900 // kmp_int32 didit); 1901 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1902 auto *CpyFnTy = 1903 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1904 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1905 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1906 CGM.Int32Ty}; 1907 auto *FnTy = 1908 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1909 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1910 break; 1911 } 1912 case OMPRTL__kmpc_reduce: { 1913 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1914 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1915 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1916 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1917 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1918 /*isVarArg=*/false); 1919 llvm::Type *TypeParams[] = { 1920 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1921 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1922 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1923 auto *FnTy = 1924 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1925 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1926 break; 1927 } 1928 case OMPRTL__kmpc_reduce_nowait: { 1929 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1930 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1931 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1932 // *lck); 1933 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1934 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1935 /*isVarArg=*/false); 1936 llvm::Type *TypeParams[] = { 1937 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1938 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1939 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1940 auto *FnTy = 1941 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1942 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1943 break; 1944 } 1945 case OMPRTL__kmpc_end_reduce: { 1946 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1947 // kmp_critical_name *lck); 1948 llvm::Type *TypeParams[] = { 1949 getIdentTyPointerTy(), CGM.Int32Ty, 1950 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1951 auto *FnTy = 1952 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1953 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1954 break; 1955 } 1956 case OMPRTL__kmpc_end_reduce_nowait: { 1957 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1958 // kmp_critical_name *lck); 1959 llvm::Type *TypeParams[] = { 1960 getIdentTyPointerTy(), CGM.Int32Ty, 1961 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1962 auto *FnTy = 1963 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1964 RTLFn = 1965 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1966 break; 1967 } 1968 case OMPRTL__kmpc_omp_task_begin_if0: { 1969 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1970 // *new_task); 1971 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1972 CGM.VoidPtrTy}; 1973 auto *FnTy = 1974 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1975 RTLFn = 1976 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1977 break; 1978 } 1979 case OMPRTL__kmpc_omp_task_complete_if0: { 1980 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1981 // *new_task); 1982 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1983 CGM.VoidPtrTy}; 1984 auto *FnTy = 1985 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1986 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1987 /*Name=*/"__kmpc_omp_task_complete_if0"); 1988 break; 1989 } 1990 case OMPRTL__kmpc_ordered: { 1991 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1992 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1993 auto *FnTy = 1994 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1995 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1996 break; 1997 } 1998 case OMPRTL__kmpc_end_ordered: { 1999 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2000 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2001 auto *FnTy = 2002 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2003 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2004 break; 2005 } 2006 case OMPRTL__kmpc_omp_taskwait: { 2007 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2008 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2009 auto *FnTy = 2010 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2011 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2012 break; 2013 } 2014 case OMPRTL__kmpc_taskgroup: { 2015 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2016 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2017 auto *FnTy = 2018 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2019 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2020 break; 2021 } 2022 case OMPRTL__kmpc_end_taskgroup: { 2023 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2024 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2025 auto *FnTy = 2026 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2027 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2028 break; 2029 } 2030 case OMPRTL__kmpc_push_proc_bind: { 2031 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2032 // int proc_bind) 2033 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2034 auto *FnTy = 2035 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2036 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2037 break; 2038 } 2039 case OMPRTL__kmpc_omp_task_with_deps: { 2040 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2041 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2042 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2043 llvm::Type *TypeParams[] = { 2044 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2045 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2046 auto *FnTy = 2047 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2048 RTLFn = 2049 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2050 break; 2051 } 2052 case OMPRTL__kmpc_omp_wait_deps: { 2053 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2054 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2055 // kmp_depend_info_t *noalias_dep_list); 2056 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2057 CGM.Int32Ty, CGM.VoidPtrTy, 2058 CGM.Int32Ty, CGM.VoidPtrTy}; 2059 auto *FnTy = 2060 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2061 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2062 break; 2063 } 2064 case OMPRTL__kmpc_cancellationpoint: { 2065 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2066 // global_tid, kmp_int32 cncl_kind) 2067 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2068 auto *FnTy = 2069 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2070 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2071 break; 2072 } 2073 case OMPRTL__kmpc_cancel: { 2074 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2075 // kmp_int32 cncl_kind) 2076 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2077 auto *FnTy = 2078 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2079 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2080 break; 2081 } 2082 case OMPRTL__kmpc_push_num_teams: { 2083 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2084 // kmp_int32 num_teams, kmp_int32 num_threads) 2085 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2086 CGM.Int32Ty}; 2087 auto *FnTy = 2088 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2089 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2090 break; 2091 } 2092 case OMPRTL__kmpc_fork_teams: { 2093 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2094 // microtask, ...); 2095 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2096 getKmpc_MicroPointerTy()}; 2097 auto *FnTy = 2098 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2099 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2100 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2101 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2102 llvm::LLVMContext &Ctx = F->getContext(); 2103 llvm::MDBuilder MDB(Ctx); 2104 // Annotate the callback behavior of the __kmpc_fork_teams: 2105 // - The callback callee is argument number 2 (microtask). 2106 // - The first two arguments of the callback callee are unknown (-1). 2107 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2108 // callback callee. 2109 F->addMetadata( 2110 llvm::LLVMContext::MD_callback, 2111 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2112 2, {-1, -1}, 2113 /* VarArgsArePassed */ true)})); 2114 } 2115 } 2116 break; 2117 } 2118 case OMPRTL__kmpc_taskloop: { 2119 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2120 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2121 // sched, kmp_uint64 grainsize, void *task_dup); 2122 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2123 CGM.IntTy, 2124 CGM.VoidPtrTy, 2125 CGM.IntTy, 2126 CGM.Int64Ty->getPointerTo(), 2127 CGM.Int64Ty->getPointerTo(), 2128 CGM.Int64Ty, 2129 CGM.IntTy, 2130 CGM.IntTy, 2131 CGM.Int64Ty, 2132 CGM.VoidPtrTy}; 2133 auto *FnTy = 2134 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2135 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2136 break; 2137 } 2138 case OMPRTL__kmpc_doacross_init: { 2139 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2140 // num_dims, struct kmp_dim *dims); 2141 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2142 CGM.Int32Ty, 2143 CGM.Int32Ty, 2144 CGM.VoidPtrTy}; 2145 auto *FnTy = 2146 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2147 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2148 break; 2149 } 2150 case OMPRTL__kmpc_doacross_fini: { 2151 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2152 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2153 auto *FnTy = 2154 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2155 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2156 break; 2157 } 2158 case OMPRTL__kmpc_doacross_post: { 2159 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2160 // *vec); 2161 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2162 CGM.Int64Ty->getPointerTo()}; 2163 auto *FnTy = 2164 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2165 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2166 break; 2167 } 2168 case OMPRTL__kmpc_doacross_wait: { 2169 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2170 // *vec); 2171 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2172 CGM.Int64Ty->getPointerTo()}; 2173 auto *FnTy = 2174 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2175 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2176 break; 2177 } 2178 case OMPRTL__kmpc_task_reduction_init: { 2179 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2180 // *data); 2181 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2182 auto *FnTy = 2183 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2184 RTLFn = 2185 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2186 break; 2187 } 2188 case OMPRTL__kmpc_task_reduction_get_th_data: { 2189 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2190 // *d); 2191 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2192 auto *FnTy = 2193 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2194 RTLFn = CGM.CreateRuntimeFunction( 2195 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2196 break; 2197 } 2198 case OMPRTL__kmpc_push_target_tripcount: { 2199 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2200 // size); 2201 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2202 llvm::FunctionType *FnTy = 2203 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2204 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2205 break; 2206 } 2207 case OMPRTL__tgt_target: { 2208 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2209 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2210 // *arg_types); 2211 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2212 CGM.VoidPtrTy, 2213 CGM.Int32Ty, 2214 CGM.VoidPtrPtrTy, 2215 CGM.VoidPtrPtrTy, 2216 CGM.SizeTy->getPointerTo(), 2217 CGM.Int64Ty->getPointerTo()}; 2218 auto *FnTy = 2219 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2220 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2221 break; 2222 } 2223 case OMPRTL__tgt_target_nowait: { 2224 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2225 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2226 // int64_t *arg_types); 2227 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2228 CGM.VoidPtrTy, 2229 CGM.Int32Ty, 2230 CGM.VoidPtrPtrTy, 2231 CGM.VoidPtrPtrTy, 2232 CGM.SizeTy->getPointerTo(), 2233 CGM.Int64Ty->getPointerTo()}; 2234 auto *FnTy = 2235 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2236 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2237 break; 2238 } 2239 case OMPRTL__tgt_target_teams: { 2240 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2241 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2242 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2243 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2244 CGM.VoidPtrTy, 2245 CGM.Int32Ty, 2246 CGM.VoidPtrPtrTy, 2247 CGM.VoidPtrPtrTy, 2248 CGM.SizeTy->getPointerTo(), 2249 CGM.Int64Ty->getPointerTo(), 2250 CGM.Int32Ty, 2251 CGM.Int32Ty}; 2252 auto *FnTy = 2253 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2254 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2255 break; 2256 } 2257 case OMPRTL__tgt_target_teams_nowait: { 2258 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2259 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t 2260 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2261 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2262 CGM.VoidPtrTy, 2263 CGM.Int32Ty, 2264 CGM.VoidPtrPtrTy, 2265 CGM.VoidPtrPtrTy, 2266 CGM.SizeTy->getPointerTo(), 2267 CGM.Int64Ty->getPointerTo(), 2268 CGM.Int32Ty, 2269 CGM.Int32Ty}; 2270 auto *FnTy = 2271 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2272 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2273 break; 2274 } 2275 case OMPRTL__tgt_register_lib: { 2276 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2277 QualType ParamTy = 2278 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2279 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2280 auto *FnTy = 2281 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2282 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2283 break; 2284 } 2285 case OMPRTL__tgt_unregister_lib: { 2286 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2287 QualType ParamTy = 2288 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2289 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2290 auto *FnTy = 2291 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2292 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2293 break; 2294 } 2295 case OMPRTL__tgt_target_data_begin: { 2296 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2297 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2298 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2299 CGM.Int32Ty, 2300 CGM.VoidPtrPtrTy, 2301 CGM.VoidPtrPtrTy, 2302 CGM.SizeTy->getPointerTo(), 2303 CGM.Int64Ty->getPointerTo()}; 2304 auto *FnTy = 2305 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2306 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2307 break; 2308 } 2309 case OMPRTL__tgt_target_data_begin_nowait: { 2310 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2311 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2312 // *arg_types); 2313 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2314 CGM.Int32Ty, 2315 CGM.VoidPtrPtrTy, 2316 CGM.VoidPtrPtrTy, 2317 CGM.SizeTy->getPointerTo(), 2318 CGM.Int64Ty->getPointerTo()}; 2319 auto *FnTy = 2320 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2321 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2322 break; 2323 } 2324 case OMPRTL__tgt_target_data_end: { 2325 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2326 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2327 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2328 CGM.Int32Ty, 2329 CGM.VoidPtrPtrTy, 2330 CGM.VoidPtrPtrTy, 2331 CGM.SizeTy->getPointerTo(), 2332 CGM.Int64Ty->getPointerTo()}; 2333 auto *FnTy = 2334 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2335 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2336 break; 2337 } 2338 case OMPRTL__tgt_target_data_end_nowait: { 2339 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2340 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2341 // *arg_types); 2342 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2343 CGM.Int32Ty, 2344 CGM.VoidPtrPtrTy, 2345 CGM.VoidPtrPtrTy, 2346 CGM.SizeTy->getPointerTo(), 2347 CGM.Int64Ty->getPointerTo()}; 2348 auto *FnTy = 2349 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2350 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2351 break; 2352 } 2353 case OMPRTL__tgt_target_data_update: { 2354 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2355 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 2356 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2357 CGM.Int32Ty, 2358 CGM.VoidPtrPtrTy, 2359 CGM.VoidPtrPtrTy, 2360 CGM.SizeTy->getPointerTo(), 2361 CGM.Int64Ty->getPointerTo()}; 2362 auto *FnTy = 2363 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2364 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2365 break; 2366 } 2367 case OMPRTL__tgt_target_data_update_nowait: { 2368 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2369 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t 2370 // *arg_types); 2371 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2372 CGM.Int32Ty, 2373 CGM.VoidPtrPtrTy, 2374 CGM.VoidPtrPtrTy, 2375 CGM.SizeTy->getPointerTo(), 2376 CGM.Int64Ty->getPointerTo()}; 2377 auto *FnTy = 2378 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2379 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2380 break; 2381 } 2382 } 2383 assert(RTLFn && "Unable to find OpenMP runtime function"); 2384 return RTLFn; 2385 } 2386 2387 llvm::FunctionCallee 2388 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2389 assert((IVSize == 32 || IVSize == 64) && 2390 "IV size is not compatible with the omp runtime"); 2391 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2392 : "__kmpc_for_static_init_4u") 2393 : (IVSigned ? "__kmpc_for_static_init_8" 2394 : "__kmpc_for_static_init_8u"); 2395 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2396 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2397 llvm::Type *TypeParams[] = { 2398 getIdentTyPointerTy(), // loc 2399 CGM.Int32Ty, // tid 2400 CGM.Int32Ty, // schedtype 2401 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2402 PtrTy, // p_lower 2403 PtrTy, // p_upper 2404 PtrTy, // p_stride 2405 ITy, // incr 2406 ITy // chunk 2407 }; 2408 auto *FnTy = 2409 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2410 return CGM.CreateRuntimeFunction(FnTy, Name); 2411 } 2412 2413 llvm::FunctionCallee 2414 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2415 assert((IVSize == 32 || IVSize == 64) && 2416 "IV size is not compatible with the omp runtime"); 2417 StringRef Name = 2418 IVSize == 32 2419 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2420 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2421 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2422 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2423 CGM.Int32Ty, // tid 2424 CGM.Int32Ty, // schedtype 2425 ITy, // lower 2426 ITy, // upper 2427 ITy, // stride 2428 ITy // chunk 2429 }; 2430 auto *FnTy = 2431 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2432 return CGM.CreateRuntimeFunction(FnTy, Name); 2433 } 2434 2435 llvm::FunctionCallee 2436 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2437 assert((IVSize == 32 || IVSize == 64) && 2438 "IV size is not compatible with the omp runtime"); 2439 StringRef Name = 2440 IVSize == 32 2441 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2442 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2443 llvm::Type *TypeParams[] = { 2444 getIdentTyPointerTy(), // loc 2445 CGM.Int32Ty, // tid 2446 }; 2447 auto *FnTy = 2448 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2449 return CGM.CreateRuntimeFunction(FnTy, Name); 2450 } 2451 2452 llvm::FunctionCallee 2453 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2454 assert((IVSize == 32 || IVSize == 64) && 2455 "IV size is not compatible with the omp runtime"); 2456 StringRef Name = 2457 IVSize == 32 2458 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2459 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2460 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2461 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2462 llvm::Type *TypeParams[] = { 2463 getIdentTyPointerTy(), // loc 2464 CGM.Int32Ty, // tid 2465 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2466 PtrTy, // p_lower 2467 PtrTy, // p_upper 2468 PtrTy // p_stride 2469 }; 2470 auto *FnTy = 2471 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2472 return CGM.CreateRuntimeFunction(FnTy, Name); 2473 } 2474 2475 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { 2476 if (CGM.getLangOpts().OpenMPSimd) 2477 return Address::invalid(); 2478 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2479 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2480 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { 2481 SmallString<64> PtrName; 2482 { 2483 llvm::raw_svector_ostream OS(PtrName); 2484 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; 2485 } 2486 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2487 if (!Ptr) { 2488 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2489 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2490 PtrName); 2491 if (!CGM.getLangOpts().OpenMPIsDevice) { 2492 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2493 GV->setLinkage(llvm::GlobalValue::ExternalLinkage); 2494 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2495 } 2496 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); 2497 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2498 } 2499 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2500 } 2501 return Address::invalid(); 2502 } 2503 2504 llvm::Constant * 2505 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2506 assert(!CGM.getLangOpts().OpenMPUseTLS || 2507 !CGM.getContext().getTargetInfo().isTLSSupported()); 2508 // Lookup the entry, lazily creating it if necessary. 2509 std::string Suffix = getName({"cache", ""}); 2510 return getOrCreateInternalVariable( 2511 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2512 } 2513 2514 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2515 const VarDecl *VD, 2516 Address VDAddr, 2517 SourceLocation Loc) { 2518 if (CGM.getLangOpts().OpenMPUseTLS && 2519 CGM.getContext().getTargetInfo().isTLSSupported()) 2520 return VDAddr; 2521 2522 llvm::Type *VarTy = VDAddr.getElementType(); 2523 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2524 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2525 CGM.Int8PtrTy), 2526 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2527 getOrCreateThreadPrivateCache(VD)}; 2528 return Address(CGF.EmitRuntimeCall( 2529 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2530 VDAddr.getAlignment()); 2531 } 2532 2533 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2534 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2535 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2536 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2537 // library. 2538 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2539 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2540 OMPLoc); 2541 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2542 // to register constructor/destructor for variable. 2543 llvm::Value *Args[] = { 2544 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2545 Ctor, CopyCtor, Dtor}; 2546 CGF.EmitRuntimeCall( 2547 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2548 } 2549 2550 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2551 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2552 bool PerformInit, CodeGenFunction *CGF) { 2553 if (CGM.getLangOpts().OpenMPUseTLS && 2554 CGM.getContext().getTargetInfo().isTLSSupported()) 2555 return nullptr; 2556 2557 VD = VD->getDefinition(CGM.getContext()); 2558 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2559 QualType ASTTy = VD->getType(); 2560 2561 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2562 const Expr *Init = VD->getAnyInitializer(); 2563 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2564 // Generate function that re-emits the declaration's initializer into the 2565 // threadprivate copy of the variable VD 2566 CodeGenFunction CtorCGF(CGM); 2567 FunctionArgList Args; 2568 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2569 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2570 ImplicitParamDecl::Other); 2571 Args.push_back(&Dst); 2572 2573 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2574 CGM.getContext().VoidPtrTy, Args); 2575 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2576 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2577 llvm::Function *Fn = 2578 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2579 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2580 Args, Loc, Loc); 2581 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2582 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2583 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2584 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2585 Arg = CtorCGF.Builder.CreateElementBitCast( 2586 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2587 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2588 /*IsInitializer=*/true); 2589 ArgVal = CtorCGF.EmitLoadOfScalar( 2590 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2591 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2592 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2593 CtorCGF.FinishFunction(); 2594 Ctor = Fn; 2595 } 2596 if (VD->getType().isDestructedType() != QualType::DK_none) { 2597 // Generate function that emits destructor call for the threadprivate copy 2598 // of the variable VD 2599 CodeGenFunction DtorCGF(CGM); 2600 FunctionArgList Args; 2601 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2602 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2603 ImplicitParamDecl::Other); 2604 Args.push_back(&Dst); 2605 2606 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2607 CGM.getContext().VoidTy, Args); 2608 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2609 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2610 llvm::Function *Fn = 2611 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2612 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2613 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2614 Loc, Loc); 2615 // Create a scope with an artificial location for the body of this function. 2616 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2617 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2618 DtorCGF.GetAddrOfLocalVar(&Dst), 2619 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2620 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2621 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2622 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2623 DtorCGF.FinishFunction(); 2624 Dtor = Fn; 2625 } 2626 // Do not emit init function if it is not required. 2627 if (!Ctor && !Dtor) 2628 return nullptr; 2629 2630 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2631 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2632 /*isVarArg=*/false) 2633 ->getPointerTo(); 2634 // Copying constructor for the threadprivate variable. 2635 // Must be NULL - reserved by runtime, but currently it requires that this 2636 // parameter is always NULL. Otherwise it fires assertion. 2637 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2638 if (Ctor == nullptr) { 2639 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2640 /*isVarArg=*/false) 2641 ->getPointerTo(); 2642 Ctor = llvm::Constant::getNullValue(CtorTy); 2643 } 2644 if (Dtor == nullptr) { 2645 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2646 /*isVarArg=*/false) 2647 ->getPointerTo(); 2648 Dtor = llvm::Constant::getNullValue(DtorTy); 2649 } 2650 if (!CGF) { 2651 auto *InitFunctionTy = 2652 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2653 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2654 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2655 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2656 CodeGenFunction InitCGF(CGM); 2657 FunctionArgList ArgList; 2658 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2659 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2660 Loc, Loc); 2661 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2662 InitCGF.FinishFunction(); 2663 return InitFunction; 2664 } 2665 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2666 } 2667 return nullptr; 2668 } 2669 2670 /// Obtain information that uniquely identifies a target entry. This 2671 /// consists of the file and device IDs as well as line number associated with 2672 /// the relevant entry source location. 2673 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2674 unsigned &DeviceID, unsigned &FileID, 2675 unsigned &LineNum) { 2676 SourceManager &SM = C.getSourceManager(); 2677 2678 // The loc should be always valid and have a file ID (the user cannot use 2679 // #pragma directives in macros) 2680 2681 assert(Loc.isValid() && "Source location is expected to be always valid."); 2682 2683 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2684 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2685 2686 llvm::sys::fs::UniqueID ID; 2687 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2688 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2689 << PLoc.getFilename() << EC.message(); 2690 2691 DeviceID = ID.getDevice(); 2692 FileID = ID.getFile(); 2693 LineNum = PLoc.getLine(); 2694 } 2695 2696 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2697 llvm::GlobalVariable *Addr, 2698 bool PerformInit) { 2699 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2700 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2701 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) 2702 return CGM.getLangOpts().OpenMPIsDevice; 2703 VD = VD->getDefinition(CGM.getContext()); 2704 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2705 return CGM.getLangOpts().OpenMPIsDevice; 2706 2707 QualType ASTTy = VD->getType(); 2708 2709 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2710 // Produce the unique prefix to identify the new target regions. We use 2711 // the source location of the variable declaration which we know to not 2712 // conflict with any target region. 2713 unsigned DeviceID; 2714 unsigned FileID; 2715 unsigned Line; 2716 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2717 SmallString<128> Buffer, Out; 2718 { 2719 llvm::raw_svector_ostream OS(Buffer); 2720 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2721 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2722 } 2723 2724 const Expr *Init = VD->getAnyInitializer(); 2725 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2726 llvm::Constant *Ctor; 2727 llvm::Constant *ID; 2728 if (CGM.getLangOpts().OpenMPIsDevice) { 2729 // Generate function that re-emits the declaration's initializer into 2730 // the threadprivate copy of the variable VD 2731 CodeGenFunction CtorCGF(CGM); 2732 2733 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2734 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2735 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2736 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2737 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2738 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2739 FunctionArgList(), Loc, Loc); 2740 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2741 CtorCGF.EmitAnyExprToMem(Init, 2742 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2743 Init->getType().getQualifiers(), 2744 /*IsInitializer=*/true); 2745 CtorCGF.FinishFunction(); 2746 Ctor = Fn; 2747 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2748 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2749 } else { 2750 Ctor = new llvm::GlobalVariable( 2751 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2752 llvm::GlobalValue::PrivateLinkage, 2753 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2754 ID = Ctor; 2755 } 2756 2757 // Register the information for the entry associated with the constructor. 2758 Out.clear(); 2759 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2760 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2761 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2762 } 2763 if (VD->getType().isDestructedType() != QualType::DK_none) { 2764 llvm::Constant *Dtor; 2765 llvm::Constant *ID; 2766 if (CGM.getLangOpts().OpenMPIsDevice) { 2767 // Generate function that emits destructor call for the threadprivate 2768 // copy of the variable VD 2769 CodeGenFunction DtorCGF(CGM); 2770 2771 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2772 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2773 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2774 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2775 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2776 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2777 FunctionArgList(), Loc, Loc); 2778 // Create a scope with an artificial location for the body of this 2779 // function. 2780 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2781 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2782 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2783 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2784 DtorCGF.FinishFunction(); 2785 Dtor = Fn; 2786 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2787 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2788 } else { 2789 Dtor = new llvm::GlobalVariable( 2790 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2791 llvm::GlobalValue::PrivateLinkage, 2792 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2793 ID = Dtor; 2794 } 2795 // Register the information for the entry associated with the destructor. 2796 Out.clear(); 2797 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2798 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2799 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2800 } 2801 return CGM.getLangOpts().OpenMPIsDevice; 2802 } 2803 2804 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2805 QualType VarType, 2806 StringRef Name) { 2807 std::string Suffix = getName({"artificial", ""}); 2808 std::string CacheSuffix = getName({"cache", ""}); 2809 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2810 llvm::Value *GAddr = 2811 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2812 llvm::Value *Args[] = { 2813 emitUpdateLocation(CGF, SourceLocation()), 2814 getThreadID(CGF, SourceLocation()), 2815 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2816 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2817 /*IsSigned=*/false), 2818 getOrCreateInternalVariable( 2819 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2820 return Address( 2821 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2822 CGF.EmitRuntimeCall( 2823 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2824 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2825 CGM.getPointerAlign()); 2826 } 2827 2828 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2829 const RegionCodeGenTy &ThenGen, 2830 const RegionCodeGenTy &ElseGen) { 2831 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2832 2833 // If the condition constant folds and can be elided, try to avoid emitting 2834 // the condition and the dead arm of the if/else. 2835 bool CondConstant; 2836 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2837 if (CondConstant) 2838 ThenGen(CGF); 2839 else 2840 ElseGen(CGF); 2841 return; 2842 } 2843 2844 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2845 // emit the conditional branch. 2846 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2847 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2848 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2849 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2850 2851 // Emit the 'then' code. 2852 CGF.EmitBlock(ThenBlock); 2853 ThenGen(CGF); 2854 CGF.EmitBranch(ContBlock); 2855 // Emit the 'else' code if present. 2856 // There is no need to emit line number for unconditional branch. 2857 (void)ApplyDebugLocation::CreateEmpty(CGF); 2858 CGF.EmitBlock(ElseBlock); 2859 ElseGen(CGF); 2860 // There is no need to emit line number for unconditional branch. 2861 (void)ApplyDebugLocation::CreateEmpty(CGF); 2862 CGF.EmitBranch(ContBlock); 2863 // Emit the continuation block for code after the if. 2864 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2865 } 2866 2867 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2868 llvm::Function *OutlinedFn, 2869 ArrayRef<llvm::Value *> CapturedVars, 2870 const Expr *IfCond) { 2871 if (!CGF.HaveInsertPoint()) 2872 return; 2873 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2874 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2875 PrePostActionTy &) { 2876 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2877 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2878 llvm::Value *Args[] = { 2879 RTLoc, 2880 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2881 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2882 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2883 RealArgs.append(std::begin(Args), std::end(Args)); 2884 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2885 2886 llvm::FunctionCallee RTLFn = 2887 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2888 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2889 }; 2890 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2891 PrePostActionTy &) { 2892 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2893 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2894 // Build calls: 2895 // __kmpc_serialized_parallel(&Loc, GTid); 2896 llvm::Value *Args[] = {RTLoc, ThreadID}; 2897 CGF.EmitRuntimeCall( 2898 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2899 2900 // OutlinedFn(>id, &zero, CapturedStruct); 2901 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2902 /*Name*/ ".zero.addr"); 2903 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2904 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2905 // ThreadId for serialized parallels is 0. 2906 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2907 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2908 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2909 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2910 2911 // __kmpc_end_serialized_parallel(&Loc, GTid); 2912 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2913 CGF.EmitRuntimeCall( 2914 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2915 EndArgs); 2916 }; 2917 if (IfCond) { 2918 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2919 } else { 2920 RegionCodeGenTy ThenRCG(ThenGen); 2921 ThenRCG(CGF); 2922 } 2923 } 2924 2925 // If we're inside an (outlined) parallel region, use the region info's 2926 // thread-ID variable (it is passed in a first argument of the outlined function 2927 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2928 // regular serial code region, get thread ID by calling kmp_int32 2929 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2930 // return the address of that temp. 2931 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2932 SourceLocation Loc) { 2933 if (auto *OMPRegionInfo = 2934 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2935 if (OMPRegionInfo->getThreadIDVariable()) 2936 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2937 2938 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2939 QualType Int32Ty = 2940 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2941 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2942 CGF.EmitStoreOfScalar(ThreadID, 2943 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2944 2945 return ThreadIDTemp; 2946 } 2947 2948 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2949 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2950 SmallString<256> Buffer; 2951 llvm::raw_svector_ostream Out(Buffer); 2952 Out << Name; 2953 StringRef RuntimeName = Out.str(); 2954 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2955 if (Elem.second) { 2956 assert(Elem.second->getType()->getPointerElementType() == Ty && 2957 "OMP internal variable has different type than requested"); 2958 return &*Elem.second; 2959 } 2960 2961 return Elem.second = new llvm::GlobalVariable( 2962 CGM.getModule(), Ty, /*IsConstant*/ false, 2963 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2964 Elem.first(), /*InsertBefore=*/nullptr, 2965 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2966 } 2967 2968 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2969 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2970 std::string Name = getName({Prefix, "var"}); 2971 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2972 } 2973 2974 namespace { 2975 /// Common pre(post)-action for different OpenMP constructs. 2976 class CommonActionTy final : public PrePostActionTy { 2977 llvm::FunctionCallee EnterCallee; 2978 ArrayRef<llvm::Value *> EnterArgs; 2979 llvm::FunctionCallee ExitCallee; 2980 ArrayRef<llvm::Value *> ExitArgs; 2981 bool Conditional; 2982 llvm::BasicBlock *ContBlock = nullptr; 2983 2984 public: 2985 CommonActionTy(llvm::FunctionCallee EnterCallee, 2986 ArrayRef<llvm::Value *> EnterArgs, 2987 llvm::FunctionCallee ExitCallee, 2988 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2989 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2990 ExitArgs(ExitArgs), Conditional(Conditional) {} 2991 void Enter(CodeGenFunction &CGF) override { 2992 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2993 if (Conditional) { 2994 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2995 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2996 ContBlock = CGF.createBasicBlock("omp_if.end"); 2997 // Generate the branch (If-stmt) 2998 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2999 CGF.EmitBlock(ThenBlock); 3000 } 3001 } 3002 void Done(CodeGenFunction &CGF) { 3003 // Emit the rest of blocks/branches 3004 CGF.EmitBranch(ContBlock); 3005 CGF.EmitBlock(ContBlock, true); 3006 } 3007 void Exit(CodeGenFunction &CGF) override { 3008 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3009 } 3010 }; 3011 } // anonymous namespace 3012 3013 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3014 StringRef CriticalName, 3015 const RegionCodeGenTy &CriticalOpGen, 3016 SourceLocation Loc, const Expr *Hint) { 3017 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3018 // CriticalOpGen(); 3019 // __kmpc_end_critical(ident_t *, gtid, Lock); 3020 // Prepare arguments and build a call to __kmpc_critical 3021 if (!CGF.HaveInsertPoint()) 3022 return; 3023 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3024 getCriticalRegionLock(CriticalName)}; 3025 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3026 std::end(Args)); 3027 if (Hint) { 3028 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3029 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3030 } 3031 CommonActionTy Action( 3032 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3033 : OMPRTL__kmpc_critical), 3034 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3035 CriticalOpGen.setAction(Action); 3036 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3037 } 3038 3039 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3040 const RegionCodeGenTy &MasterOpGen, 3041 SourceLocation Loc) { 3042 if (!CGF.HaveInsertPoint()) 3043 return; 3044 // if(__kmpc_master(ident_t *, gtid)) { 3045 // MasterOpGen(); 3046 // __kmpc_end_master(ident_t *, gtid); 3047 // } 3048 // Prepare arguments and build a call to __kmpc_master 3049 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3050 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3051 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3052 /*Conditional=*/true); 3053 MasterOpGen.setAction(Action); 3054 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3055 Action.Done(CGF); 3056 } 3057 3058 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3059 SourceLocation Loc) { 3060 if (!CGF.HaveInsertPoint()) 3061 return; 3062 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3063 llvm::Value *Args[] = { 3064 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3065 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3066 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3067 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3068 Region->emitUntiedSwitch(CGF); 3069 } 3070 3071 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3072 const RegionCodeGenTy &TaskgroupOpGen, 3073 SourceLocation Loc) { 3074 if (!CGF.HaveInsertPoint()) 3075 return; 3076 // __kmpc_taskgroup(ident_t *, gtid); 3077 // TaskgroupOpGen(); 3078 // __kmpc_end_taskgroup(ident_t *, gtid); 3079 // Prepare arguments and build a call to __kmpc_taskgroup 3080 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3081 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3082 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3083 Args); 3084 TaskgroupOpGen.setAction(Action); 3085 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3086 } 3087 3088 /// Given an array of pointers to variables, project the address of a 3089 /// given variable. 3090 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3091 unsigned Index, const VarDecl *Var) { 3092 // Pull out the pointer to the variable. 3093 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3094 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3095 3096 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3097 Addr = CGF.Builder.CreateElementBitCast( 3098 Addr, CGF.ConvertTypeForMem(Var->getType())); 3099 return Addr; 3100 } 3101 3102 static llvm::Value *emitCopyprivateCopyFunction( 3103 CodeGenModule &CGM, llvm::Type *ArgsType, 3104 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3105 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3106 SourceLocation Loc) { 3107 ASTContext &C = CGM.getContext(); 3108 // void copy_func(void *LHSArg, void *RHSArg); 3109 FunctionArgList Args; 3110 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3111 ImplicitParamDecl::Other); 3112 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3113 ImplicitParamDecl::Other); 3114 Args.push_back(&LHSArg); 3115 Args.push_back(&RHSArg); 3116 const auto &CGFI = 3117 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3118 std::string Name = 3119 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3120 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3121 llvm::GlobalValue::InternalLinkage, Name, 3122 &CGM.getModule()); 3123 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3124 Fn->setDoesNotRecurse(); 3125 CodeGenFunction CGF(CGM); 3126 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3127 // Dest = (void*[n])(LHSArg); 3128 // Src = (void*[n])(RHSArg); 3129 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3130 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3131 ArgsType), CGF.getPointerAlign()); 3132 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3133 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3134 ArgsType), CGF.getPointerAlign()); 3135 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3136 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3137 // ... 3138 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3139 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3140 const auto *DestVar = 3141 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3142 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3143 3144 const auto *SrcVar = 3145 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3146 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3147 3148 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3149 QualType Type = VD->getType(); 3150 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3151 } 3152 CGF.FinishFunction(); 3153 return Fn; 3154 } 3155 3156 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3157 const RegionCodeGenTy &SingleOpGen, 3158 SourceLocation Loc, 3159 ArrayRef<const Expr *> CopyprivateVars, 3160 ArrayRef<const Expr *> SrcExprs, 3161 ArrayRef<const Expr *> DstExprs, 3162 ArrayRef<const Expr *> AssignmentOps) { 3163 if (!CGF.HaveInsertPoint()) 3164 return; 3165 assert(CopyprivateVars.size() == SrcExprs.size() && 3166 CopyprivateVars.size() == DstExprs.size() && 3167 CopyprivateVars.size() == AssignmentOps.size()); 3168 ASTContext &C = CGM.getContext(); 3169 // int32 did_it = 0; 3170 // if(__kmpc_single(ident_t *, gtid)) { 3171 // SingleOpGen(); 3172 // __kmpc_end_single(ident_t *, gtid); 3173 // did_it = 1; 3174 // } 3175 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3176 // <copy_func>, did_it); 3177 3178 Address DidIt = Address::invalid(); 3179 if (!CopyprivateVars.empty()) { 3180 // int32 did_it = 0; 3181 QualType KmpInt32Ty = 3182 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3183 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3184 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3185 } 3186 // Prepare arguments and build a call to __kmpc_single 3187 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3188 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3189 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3190 /*Conditional=*/true); 3191 SingleOpGen.setAction(Action); 3192 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3193 if (DidIt.isValid()) { 3194 // did_it = 1; 3195 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3196 } 3197 Action.Done(CGF); 3198 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3199 // <copy_func>, did_it); 3200 if (DidIt.isValid()) { 3201 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3202 QualType CopyprivateArrayTy = 3203 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3204 /*IndexTypeQuals=*/0); 3205 // Create a list of all private variables for copyprivate. 3206 Address CopyprivateList = 3207 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3208 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3209 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3210 CGF.Builder.CreateStore( 3211 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3212 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3213 Elem); 3214 } 3215 // Build function that copies private values from single region to all other 3216 // threads in the corresponding parallel region. 3217 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3218 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3219 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3220 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3221 Address CL = 3222 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3223 CGF.VoidPtrTy); 3224 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3225 llvm::Value *Args[] = { 3226 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3227 getThreadID(CGF, Loc), // i32 <gtid> 3228 BufSize, // size_t <buf_size> 3229 CL.getPointer(), // void *<copyprivate list> 3230 CpyFn, // void (*) (void *, void *) <copy_func> 3231 DidItVal // i32 did_it 3232 }; 3233 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3234 } 3235 } 3236 3237 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3238 const RegionCodeGenTy &OrderedOpGen, 3239 SourceLocation Loc, bool IsThreads) { 3240 if (!CGF.HaveInsertPoint()) 3241 return; 3242 // __kmpc_ordered(ident_t *, gtid); 3243 // OrderedOpGen(); 3244 // __kmpc_end_ordered(ident_t *, gtid); 3245 // Prepare arguments and build a call to __kmpc_ordered 3246 if (IsThreads) { 3247 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3248 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3249 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3250 Args); 3251 OrderedOpGen.setAction(Action); 3252 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3253 return; 3254 } 3255 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3256 } 3257 3258 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3259 unsigned Flags; 3260 if (Kind == OMPD_for) 3261 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3262 else if (Kind == OMPD_sections) 3263 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3264 else if (Kind == OMPD_single) 3265 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3266 else if (Kind == OMPD_barrier) 3267 Flags = OMP_IDENT_BARRIER_EXPL; 3268 else 3269 Flags = OMP_IDENT_BARRIER_IMPL; 3270 return Flags; 3271 } 3272 3273 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3274 OpenMPDirectiveKind Kind, bool EmitChecks, 3275 bool ForceSimpleCall) { 3276 if (!CGF.HaveInsertPoint()) 3277 return; 3278 // Build call __kmpc_cancel_barrier(loc, thread_id); 3279 // Build call __kmpc_barrier(loc, thread_id); 3280 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3281 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3282 // thread_id); 3283 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3284 getThreadID(CGF, Loc)}; 3285 if (auto *OMPRegionInfo = 3286 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3287 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3288 llvm::Value *Result = CGF.EmitRuntimeCall( 3289 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3290 if (EmitChecks) { 3291 // if (__kmpc_cancel_barrier()) { 3292 // exit from construct; 3293 // } 3294 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3295 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3296 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3297 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3298 CGF.EmitBlock(ExitBB); 3299 // exit from construct; 3300 CodeGenFunction::JumpDest CancelDestination = 3301 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3302 CGF.EmitBranchThroughCleanup(CancelDestination); 3303 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3304 } 3305 return; 3306 } 3307 } 3308 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3309 } 3310 3311 /// Map the OpenMP loop schedule to the runtime enumeration. 3312 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3313 bool Chunked, bool Ordered) { 3314 switch (ScheduleKind) { 3315 case OMPC_SCHEDULE_static: 3316 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3317 : (Ordered ? OMP_ord_static : OMP_sch_static); 3318 case OMPC_SCHEDULE_dynamic: 3319 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3320 case OMPC_SCHEDULE_guided: 3321 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3322 case OMPC_SCHEDULE_runtime: 3323 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3324 case OMPC_SCHEDULE_auto: 3325 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3326 case OMPC_SCHEDULE_unknown: 3327 assert(!Chunked && "chunk was specified but schedule kind not known"); 3328 return Ordered ? OMP_ord_static : OMP_sch_static; 3329 } 3330 llvm_unreachable("Unexpected runtime schedule"); 3331 } 3332 3333 /// Map the OpenMP distribute schedule to the runtime enumeration. 3334 static OpenMPSchedType 3335 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3336 // only static is allowed for dist_schedule 3337 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3338 } 3339 3340 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3341 bool Chunked) const { 3342 OpenMPSchedType Schedule = 3343 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3344 return Schedule == OMP_sch_static; 3345 } 3346 3347 bool CGOpenMPRuntime::isStaticNonchunked( 3348 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3349 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3350 return Schedule == OMP_dist_sch_static; 3351 } 3352 3353 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3354 bool Chunked) const { 3355 OpenMPSchedType Schedule = 3356 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3357 return Schedule == OMP_sch_static_chunked; 3358 } 3359 3360 bool CGOpenMPRuntime::isStaticChunked( 3361 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3362 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3363 return Schedule == OMP_dist_sch_static_chunked; 3364 } 3365 3366 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3367 OpenMPSchedType Schedule = 3368 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3369 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3370 return Schedule != OMP_sch_static; 3371 } 3372 3373 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3374 OpenMPScheduleClauseModifier M1, 3375 OpenMPScheduleClauseModifier M2) { 3376 int Modifier = 0; 3377 switch (M1) { 3378 case OMPC_SCHEDULE_MODIFIER_monotonic: 3379 Modifier = OMP_sch_modifier_monotonic; 3380 break; 3381 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3382 Modifier = OMP_sch_modifier_nonmonotonic; 3383 break; 3384 case OMPC_SCHEDULE_MODIFIER_simd: 3385 if (Schedule == OMP_sch_static_chunked) 3386 Schedule = OMP_sch_static_balanced_chunked; 3387 break; 3388 case OMPC_SCHEDULE_MODIFIER_last: 3389 case OMPC_SCHEDULE_MODIFIER_unknown: 3390 break; 3391 } 3392 switch (M2) { 3393 case OMPC_SCHEDULE_MODIFIER_monotonic: 3394 Modifier = OMP_sch_modifier_monotonic; 3395 break; 3396 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3397 Modifier = OMP_sch_modifier_nonmonotonic; 3398 break; 3399 case OMPC_SCHEDULE_MODIFIER_simd: 3400 if (Schedule == OMP_sch_static_chunked) 3401 Schedule = OMP_sch_static_balanced_chunked; 3402 break; 3403 case OMPC_SCHEDULE_MODIFIER_last: 3404 case OMPC_SCHEDULE_MODIFIER_unknown: 3405 break; 3406 } 3407 return Schedule | Modifier; 3408 } 3409 3410 void CGOpenMPRuntime::emitForDispatchInit( 3411 CodeGenFunction &CGF, SourceLocation Loc, 3412 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3413 bool Ordered, const DispatchRTInput &DispatchValues) { 3414 if (!CGF.HaveInsertPoint()) 3415 return; 3416 OpenMPSchedType Schedule = getRuntimeSchedule( 3417 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3418 assert(Ordered || 3419 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3420 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3421 Schedule != OMP_sch_static_balanced_chunked)); 3422 // Call __kmpc_dispatch_init( 3423 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3424 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3425 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3426 3427 // If the Chunk was not specified in the clause - use default value 1. 3428 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3429 : CGF.Builder.getIntN(IVSize, 1); 3430 llvm::Value *Args[] = { 3431 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3432 CGF.Builder.getInt32(addMonoNonMonoModifier( 3433 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3434 DispatchValues.LB, // Lower 3435 DispatchValues.UB, // Upper 3436 CGF.Builder.getIntN(IVSize, 1), // Stride 3437 Chunk // Chunk 3438 }; 3439 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3440 } 3441 3442 static void emitForStaticInitCall( 3443 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3444 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3445 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3446 const CGOpenMPRuntime::StaticRTInput &Values) { 3447 if (!CGF.HaveInsertPoint()) 3448 return; 3449 3450 assert(!Values.Ordered); 3451 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3452 Schedule == OMP_sch_static_balanced_chunked || 3453 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3454 Schedule == OMP_dist_sch_static || 3455 Schedule == OMP_dist_sch_static_chunked); 3456 3457 // Call __kmpc_for_static_init( 3458 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3459 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3460 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3461 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3462 llvm::Value *Chunk = Values.Chunk; 3463 if (Chunk == nullptr) { 3464 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3465 Schedule == OMP_dist_sch_static) && 3466 "expected static non-chunked schedule"); 3467 // If the Chunk was not specified in the clause - use default value 1. 3468 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3469 } else { 3470 assert((Schedule == OMP_sch_static_chunked || 3471 Schedule == OMP_sch_static_balanced_chunked || 3472 Schedule == OMP_ord_static_chunked || 3473 Schedule == OMP_dist_sch_static_chunked) && 3474 "expected static chunked schedule"); 3475 } 3476 llvm::Value *Args[] = { 3477 UpdateLocation, 3478 ThreadId, 3479 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3480 M2)), // Schedule type 3481 Values.IL.getPointer(), // &isLastIter 3482 Values.LB.getPointer(), // &LB 3483 Values.UB.getPointer(), // &UB 3484 Values.ST.getPointer(), // &Stride 3485 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3486 Chunk // Chunk 3487 }; 3488 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3489 } 3490 3491 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3492 SourceLocation Loc, 3493 OpenMPDirectiveKind DKind, 3494 const OpenMPScheduleTy &ScheduleKind, 3495 const StaticRTInput &Values) { 3496 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3497 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3498 assert(isOpenMPWorksharingDirective(DKind) && 3499 "Expected loop-based or sections-based directive."); 3500 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3501 isOpenMPLoopDirective(DKind) 3502 ? OMP_IDENT_WORK_LOOP 3503 : OMP_IDENT_WORK_SECTIONS); 3504 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3505 llvm::FunctionCallee StaticInitFunction = 3506 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3507 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3508 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3509 } 3510 3511 void CGOpenMPRuntime::emitDistributeStaticInit( 3512 CodeGenFunction &CGF, SourceLocation Loc, 3513 OpenMPDistScheduleClauseKind SchedKind, 3514 const CGOpenMPRuntime::StaticRTInput &Values) { 3515 OpenMPSchedType ScheduleNum = 3516 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3517 llvm::Value *UpdatedLocation = 3518 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3519 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3520 llvm::FunctionCallee StaticInitFunction = 3521 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3522 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3523 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3524 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3525 } 3526 3527 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3528 SourceLocation Loc, 3529 OpenMPDirectiveKind DKind) { 3530 if (!CGF.HaveInsertPoint()) 3531 return; 3532 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3533 llvm::Value *Args[] = { 3534 emitUpdateLocation(CGF, Loc, 3535 isOpenMPDistributeDirective(DKind) 3536 ? OMP_IDENT_WORK_DISTRIBUTE 3537 : isOpenMPLoopDirective(DKind) 3538 ? OMP_IDENT_WORK_LOOP 3539 : OMP_IDENT_WORK_SECTIONS), 3540 getThreadID(CGF, Loc)}; 3541 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3542 Args); 3543 } 3544 3545 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3546 SourceLocation Loc, 3547 unsigned IVSize, 3548 bool IVSigned) { 3549 if (!CGF.HaveInsertPoint()) 3550 return; 3551 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3552 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3553 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3554 } 3555 3556 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3557 SourceLocation Loc, unsigned IVSize, 3558 bool IVSigned, Address IL, 3559 Address LB, Address UB, 3560 Address ST) { 3561 // Call __kmpc_dispatch_next( 3562 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3563 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3564 // kmp_int[32|64] *p_stride); 3565 llvm::Value *Args[] = { 3566 emitUpdateLocation(CGF, Loc), 3567 getThreadID(CGF, Loc), 3568 IL.getPointer(), // &isLastIter 3569 LB.getPointer(), // &Lower 3570 UB.getPointer(), // &Upper 3571 ST.getPointer() // &Stride 3572 }; 3573 llvm::Value *Call = 3574 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3575 return CGF.EmitScalarConversion( 3576 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3577 CGF.getContext().BoolTy, Loc); 3578 } 3579 3580 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3581 llvm::Value *NumThreads, 3582 SourceLocation Loc) { 3583 if (!CGF.HaveInsertPoint()) 3584 return; 3585 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3586 llvm::Value *Args[] = { 3587 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3588 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3589 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3590 Args); 3591 } 3592 3593 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3594 OpenMPProcBindClauseKind ProcBind, 3595 SourceLocation Loc) { 3596 if (!CGF.HaveInsertPoint()) 3597 return; 3598 // Constants for proc bind value accepted by the runtime. 3599 enum ProcBindTy { 3600 ProcBindFalse = 0, 3601 ProcBindTrue, 3602 ProcBindMaster, 3603 ProcBindClose, 3604 ProcBindSpread, 3605 ProcBindIntel, 3606 ProcBindDefault 3607 } RuntimeProcBind; 3608 switch (ProcBind) { 3609 case OMPC_PROC_BIND_master: 3610 RuntimeProcBind = ProcBindMaster; 3611 break; 3612 case OMPC_PROC_BIND_close: 3613 RuntimeProcBind = ProcBindClose; 3614 break; 3615 case OMPC_PROC_BIND_spread: 3616 RuntimeProcBind = ProcBindSpread; 3617 break; 3618 case OMPC_PROC_BIND_unknown: 3619 llvm_unreachable("Unsupported proc_bind value."); 3620 } 3621 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3622 llvm::Value *Args[] = { 3623 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3624 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3625 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3626 } 3627 3628 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3629 SourceLocation Loc) { 3630 if (!CGF.HaveInsertPoint()) 3631 return; 3632 // Build call void __kmpc_flush(ident_t *loc) 3633 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3634 emitUpdateLocation(CGF, Loc)); 3635 } 3636 3637 namespace { 3638 /// Indexes of fields for type kmp_task_t. 3639 enum KmpTaskTFields { 3640 /// List of shared variables. 3641 KmpTaskTShareds, 3642 /// Task routine. 3643 KmpTaskTRoutine, 3644 /// Partition id for the untied tasks. 3645 KmpTaskTPartId, 3646 /// Function with call of destructors for private variables. 3647 Data1, 3648 /// Task priority. 3649 Data2, 3650 /// (Taskloops only) Lower bound. 3651 KmpTaskTLowerBound, 3652 /// (Taskloops only) Upper bound. 3653 KmpTaskTUpperBound, 3654 /// (Taskloops only) Stride. 3655 KmpTaskTStride, 3656 /// (Taskloops only) Is last iteration flag. 3657 KmpTaskTLastIter, 3658 /// (Taskloops only) Reduction data. 3659 KmpTaskTReductions, 3660 }; 3661 } // anonymous namespace 3662 3663 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3664 return OffloadEntriesTargetRegion.empty() && 3665 OffloadEntriesDeviceGlobalVar.empty(); 3666 } 3667 3668 /// Initialize target region entry. 3669 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3670 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3671 StringRef ParentName, unsigned LineNum, 3672 unsigned Order) { 3673 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3674 "only required for the device " 3675 "code generation."); 3676 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3677 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3678 OMPTargetRegionEntryTargetRegion); 3679 ++OffloadingEntriesNum; 3680 } 3681 3682 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3683 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3684 StringRef ParentName, unsigned LineNum, 3685 llvm::Constant *Addr, llvm::Constant *ID, 3686 OMPTargetRegionEntryKind Flags) { 3687 // If we are emitting code for a target, the entry is already initialized, 3688 // only has to be registered. 3689 if (CGM.getLangOpts().OpenMPIsDevice) { 3690 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3691 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3692 DiagnosticsEngine::Error, 3693 "Unable to find target region on line '%0' in the device code."); 3694 CGM.getDiags().Report(DiagID) << LineNum; 3695 return; 3696 } 3697 auto &Entry = 3698 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3699 assert(Entry.isValid() && "Entry not initialized!"); 3700 Entry.setAddress(Addr); 3701 Entry.setID(ID); 3702 Entry.setFlags(Flags); 3703 } else { 3704 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3705 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3706 ++OffloadingEntriesNum; 3707 } 3708 } 3709 3710 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3711 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3712 unsigned LineNum) const { 3713 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3714 if (PerDevice == OffloadEntriesTargetRegion.end()) 3715 return false; 3716 auto PerFile = PerDevice->second.find(FileID); 3717 if (PerFile == PerDevice->second.end()) 3718 return false; 3719 auto PerParentName = PerFile->second.find(ParentName); 3720 if (PerParentName == PerFile->second.end()) 3721 return false; 3722 auto PerLine = PerParentName->second.find(LineNum); 3723 if (PerLine == PerParentName->second.end()) 3724 return false; 3725 // Fail if this entry is already registered. 3726 if (PerLine->second.getAddress() || PerLine->second.getID()) 3727 return false; 3728 return true; 3729 } 3730 3731 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3732 const OffloadTargetRegionEntryInfoActTy &Action) { 3733 // Scan all target region entries and perform the provided action. 3734 for (const auto &D : OffloadEntriesTargetRegion) 3735 for (const auto &F : D.second) 3736 for (const auto &P : F.second) 3737 for (const auto &L : P.second) 3738 Action(D.first, F.first, P.first(), L.first, L.second); 3739 } 3740 3741 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3742 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3743 OMPTargetGlobalVarEntryKind Flags, 3744 unsigned Order) { 3745 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3746 "only required for the device " 3747 "code generation."); 3748 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3749 ++OffloadingEntriesNum; 3750 } 3751 3752 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3753 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3754 CharUnits VarSize, 3755 OMPTargetGlobalVarEntryKind Flags, 3756 llvm::GlobalValue::LinkageTypes Linkage) { 3757 if (CGM.getLangOpts().OpenMPIsDevice) { 3758 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3759 assert(Entry.isValid() && Entry.getFlags() == Flags && 3760 "Entry not initialized!"); 3761 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3762 "Resetting with the new address."); 3763 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) 3764 return; 3765 Entry.setAddress(Addr); 3766 Entry.setVarSize(VarSize); 3767 Entry.setLinkage(Linkage); 3768 } else { 3769 if (hasDeviceGlobalVarEntryInfo(VarName)) 3770 return; 3771 OffloadEntriesDeviceGlobalVar.try_emplace( 3772 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3773 ++OffloadingEntriesNum; 3774 } 3775 } 3776 3777 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3778 actOnDeviceGlobalVarEntriesInfo( 3779 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3780 // Scan all target region entries and perform the provided action. 3781 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3782 Action(E.getKey(), E.getValue()); 3783 } 3784 3785 llvm::Function * 3786 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3787 // If we don't have entries or if we are emitting code for the device, we 3788 // don't need to do anything. 3789 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3790 return nullptr; 3791 3792 llvm::Module &M = CGM.getModule(); 3793 ASTContext &C = CGM.getContext(); 3794 3795 // Get list of devices we care about 3796 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3797 3798 // We should be creating an offloading descriptor only if there are devices 3799 // specified. 3800 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3801 3802 // Create the external variables that will point to the begin and end of the 3803 // host entries section. These will be defined by the linker. 3804 llvm::Type *OffloadEntryTy = 3805 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3806 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3807 auto *HostEntriesBegin = new llvm::GlobalVariable( 3808 M, OffloadEntryTy, /*isConstant=*/true, 3809 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3810 EntriesBeginName); 3811 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3812 auto *HostEntriesEnd = 3813 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3814 llvm::GlobalValue::ExternalLinkage, 3815 /*Initializer=*/nullptr, EntriesEndName); 3816 3817 // Create all device images 3818 auto *DeviceImageTy = cast<llvm::StructType>( 3819 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3820 ConstantInitBuilder DeviceImagesBuilder(CGM); 3821 ConstantArrayBuilder DeviceImagesEntries = 3822 DeviceImagesBuilder.beginArray(DeviceImageTy); 3823 3824 for (const llvm::Triple &Device : Devices) { 3825 StringRef T = Device.getTriple(); 3826 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3827 auto *ImgBegin = new llvm::GlobalVariable( 3828 M, CGM.Int8Ty, /*isConstant=*/true, 3829 llvm::GlobalValue::ExternalWeakLinkage, 3830 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3831 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3832 auto *ImgEnd = new llvm::GlobalVariable( 3833 M, CGM.Int8Ty, /*isConstant=*/true, 3834 llvm::GlobalValue::ExternalWeakLinkage, 3835 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3836 3837 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3838 HostEntriesEnd}; 3839 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3840 DeviceImagesEntries); 3841 } 3842 3843 // Create device images global array. 3844 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3845 llvm::GlobalVariable *DeviceImages = 3846 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 3847 CGM.getPointerAlign(), 3848 /*isConstant=*/true); 3849 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3850 3851 // This is a Zero array to be used in the creation of the constant expressions 3852 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3853 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3854 3855 // Create the target region descriptor. 3856 llvm::Constant *Data[] = { 3857 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 3858 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3859 DeviceImages, Index), 3860 HostEntriesBegin, HostEntriesEnd}; 3861 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 3862 llvm::GlobalVariable *Desc = createGlobalStruct( 3863 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 3864 3865 // Emit code to register or unregister the descriptor at execution 3866 // startup or closing, respectively. 3867 3868 llvm::Function *UnRegFn; 3869 { 3870 FunctionArgList Args; 3871 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3872 Args.push_back(&DummyPtr); 3873 3874 CodeGenFunction CGF(CGM); 3875 // Disable debug info for global (de-)initializer because they are not part 3876 // of some particular construct. 3877 CGF.disableDebugInfo(); 3878 const auto &FI = 3879 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3880 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3881 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 3882 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 3883 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 3884 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3885 Desc); 3886 CGF.FinishFunction(); 3887 } 3888 llvm::Function *RegFn; 3889 { 3890 CodeGenFunction CGF(CGM); 3891 // Disable debug info for global (de-)initializer because they are not part 3892 // of some particular construct. 3893 CGF.disableDebugInfo(); 3894 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 3895 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3896 3897 // Encode offload target triples into the registration function name. It 3898 // will serve as a comdat key for the registration/unregistration code for 3899 // this particular combination of offloading targets. 3900 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 3901 RegFnNameParts[0] = "omp_offloading"; 3902 RegFnNameParts[1] = "descriptor_reg"; 3903 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 3904 [](const llvm::Triple &T) -> const std::string& { 3905 return T.getTriple(); 3906 }); 3907 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 3908 std::string Descriptor = getName(RegFnNameParts); 3909 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 3910 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 3911 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 3912 // Create a variable to drive the registration and unregistration of the 3913 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3914 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 3915 SourceLocation(), nullptr, C.CharTy, 3916 ImplicitParamDecl::Other); 3917 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3918 CGF.FinishFunction(); 3919 } 3920 if (CGM.supportsCOMDAT()) { 3921 // It is sufficient to call registration function only once, so create a 3922 // COMDAT group for registration/unregistration functions and associated 3923 // data. That would reduce startup time and code size. Registration 3924 // function serves as a COMDAT group key. 3925 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3926 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3927 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3928 RegFn->setComdat(ComdatKey); 3929 UnRegFn->setComdat(ComdatKey); 3930 DeviceImages->setComdat(ComdatKey); 3931 Desc->setComdat(ComdatKey); 3932 } 3933 return RegFn; 3934 } 3935 3936 void CGOpenMPRuntime::createOffloadEntry( 3937 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3938 llvm::GlobalValue::LinkageTypes Linkage) { 3939 StringRef Name = Addr->getName(); 3940 llvm::Module &M = CGM.getModule(); 3941 llvm::LLVMContext &C = M.getContext(); 3942 3943 // Create constant string with the name. 3944 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3945 3946 std::string StringName = getName({"omp_offloading", "entry_name"}); 3947 auto *Str = new llvm::GlobalVariable( 3948 M, StrPtrInit->getType(), /*isConstant=*/true, 3949 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3950 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3951 3952 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3953 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3954 llvm::ConstantInt::get(CGM.SizeTy, Size), 3955 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3956 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3957 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3958 llvm::GlobalVariable *Entry = createGlobalStruct( 3959 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3960 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3961 3962 // The entry has to be created in the section the linker expects it to be. 3963 std::string Section = getName({"omp_offloading", "entries"}); 3964 Entry->setSection(Section); 3965 } 3966 3967 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3968 // Emit the offloading entries and metadata so that the device codegen side 3969 // can easily figure out what to emit. The produced metadata looks like 3970 // this: 3971 // 3972 // !omp_offload.info = !{!1, ...} 3973 // 3974 // Right now we only generate metadata for function that contain target 3975 // regions. 3976 3977 // If we do not have entries, we don't need to do anything. 3978 if (OffloadEntriesInfoManager.empty()) 3979 return; 3980 3981 llvm::Module &M = CGM.getModule(); 3982 llvm::LLVMContext &C = M.getContext(); 3983 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 3984 OrderedEntries(OffloadEntriesInfoManager.size()); 3985 llvm::SmallVector<StringRef, 16> ParentFunctions( 3986 OffloadEntriesInfoManager.size()); 3987 3988 // Auxiliary methods to create metadata values and strings. 3989 auto &&GetMDInt = [this](unsigned V) { 3990 return llvm::ConstantAsMetadata::get( 3991 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3992 }; 3993 3994 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3995 3996 // Create the offloading info metadata node. 3997 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3998 3999 // Create function that emits metadata for each target region entry; 4000 auto &&TargetRegionMetadataEmitter = 4001 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4002 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4003 unsigned Line, 4004 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4005 // Generate metadata for target regions. Each entry of this metadata 4006 // contains: 4007 // - Entry 0 -> Kind of this type of metadata (0). 4008 // - Entry 1 -> Device ID of the file where the entry was identified. 4009 // - Entry 2 -> File ID of the file where the entry was identified. 4010 // - Entry 3 -> Mangled name of the function where the entry was 4011 // identified. 4012 // - Entry 4 -> Line in the file where the entry was identified. 4013 // - Entry 5 -> Order the entry was created. 4014 // The first element of the metadata node is the kind. 4015 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4016 GetMDInt(FileID), GetMDString(ParentName), 4017 GetMDInt(Line), GetMDInt(E.getOrder())}; 4018 4019 // Save this entry in the right position of the ordered entries array. 4020 OrderedEntries[E.getOrder()] = &E; 4021 ParentFunctions[E.getOrder()] = ParentName; 4022 4023 // Add metadata to the named metadata node. 4024 MD->addOperand(llvm::MDNode::get(C, Ops)); 4025 }; 4026 4027 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4028 TargetRegionMetadataEmitter); 4029 4030 // Create function that emits metadata for each device global variable entry; 4031 auto &&DeviceGlobalVarMetadataEmitter = 4032 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4033 MD](StringRef MangledName, 4034 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4035 &E) { 4036 // Generate metadata for global variables. Each entry of this metadata 4037 // contains: 4038 // - Entry 0 -> Kind of this type of metadata (1). 4039 // - Entry 1 -> Mangled name of the variable. 4040 // - Entry 2 -> Declare target kind. 4041 // - Entry 3 -> Order the entry was created. 4042 // The first element of the metadata node is the kind. 4043 llvm::Metadata *Ops[] = { 4044 GetMDInt(E.getKind()), GetMDString(MangledName), 4045 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4046 4047 // Save this entry in the right position of the ordered entries array. 4048 OrderedEntries[E.getOrder()] = &E; 4049 4050 // Add metadata to the named metadata node. 4051 MD->addOperand(llvm::MDNode::get(C, Ops)); 4052 }; 4053 4054 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4055 DeviceGlobalVarMetadataEmitter); 4056 4057 for (const auto *E : OrderedEntries) { 4058 assert(E && "All ordered entries must exist!"); 4059 if (const auto *CE = 4060 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4061 E)) { 4062 if (!CE->getID() || !CE->getAddress()) { 4063 // Do not blame the entry if the parent funtion is not emitted. 4064 StringRef FnName = ParentFunctions[CE->getOrder()]; 4065 if (!CGM.GetGlobalValue(FnName)) 4066 continue; 4067 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4068 DiagnosticsEngine::Error, 4069 "Offloading entry for target region is incorrect: either the " 4070 "address or the ID is invalid."); 4071 CGM.getDiags().Report(DiagID); 4072 continue; 4073 } 4074 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4075 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4076 } else if (const auto *CE = 4077 dyn_cast<OffloadEntriesInfoManagerTy:: 4078 OffloadEntryInfoDeviceGlobalVar>(E)) { 4079 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4080 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4081 CE->getFlags()); 4082 switch (Flags) { 4083 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4084 if (!CE->getAddress()) { 4085 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4086 DiagnosticsEngine::Error, 4087 "Offloading entry for declare target variable is incorrect: the " 4088 "address is invalid."); 4089 CGM.getDiags().Report(DiagID); 4090 continue; 4091 } 4092 // The vaiable has no definition - no need to add the entry. 4093 if (CE->getVarSize().isZero()) 4094 continue; 4095 break; 4096 } 4097 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4098 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4099 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4100 "Declaret target link address is set."); 4101 if (CGM.getLangOpts().OpenMPIsDevice) 4102 continue; 4103 if (!CE->getAddress()) { 4104 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4105 DiagnosticsEngine::Error, 4106 "Offloading entry for declare target variable is incorrect: the " 4107 "address is invalid."); 4108 CGM.getDiags().Report(DiagID); 4109 continue; 4110 } 4111 break; 4112 } 4113 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4114 CE->getVarSize().getQuantity(), Flags, 4115 CE->getLinkage()); 4116 } else { 4117 llvm_unreachable("Unsupported entry kind."); 4118 } 4119 } 4120 } 4121 4122 /// Loads all the offload entries information from the host IR 4123 /// metadata. 4124 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4125 // If we are in target mode, load the metadata from the host IR. This code has 4126 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4127 4128 if (!CGM.getLangOpts().OpenMPIsDevice) 4129 return; 4130 4131 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4132 return; 4133 4134 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4135 if (auto EC = Buf.getError()) { 4136 CGM.getDiags().Report(diag::err_cannot_open_file) 4137 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4138 return; 4139 } 4140 4141 llvm::LLVMContext C; 4142 auto ME = expectedToErrorOrAndEmitErrors( 4143 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4144 4145 if (auto EC = ME.getError()) { 4146 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4147 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4148 CGM.getDiags().Report(DiagID) 4149 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4150 return; 4151 } 4152 4153 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4154 if (!MD) 4155 return; 4156 4157 for (llvm::MDNode *MN : MD->operands()) { 4158 auto &&GetMDInt = [MN](unsigned Idx) { 4159 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4160 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4161 }; 4162 4163 auto &&GetMDString = [MN](unsigned Idx) { 4164 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4165 return V->getString(); 4166 }; 4167 4168 switch (GetMDInt(0)) { 4169 default: 4170 llvm_unreachable("Unexpected metadata!"); 4171 break; 4172 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4173 OffloadingEntryInfoTargetRegion: 4174 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4175 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4176 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4177 /*Order=*/GetMDInt(5)); 4178 break; 4179 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4180 OffloadingEntryInfoDeviceGlobalVar: 4181 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4182 /*MangledName=*/GetMDString(1), 4183 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4184 /*Flags=*/GetMDInt(2)), 4185 /*Order=*/GetMDInt(3)); 4186 break; 4187 } 4188 } 4189 } 4190 4191 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4192 if (!KmpRoutineEntryPtrTy) { 4193 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4194 ASTContext &C = CGM.getContext(); 4195 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4196 FunctionProtoType::ExtProtoInfo EPI; 4197 KmpRoutineEntryPtrQTy = C.getPointerType( 4198 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4199 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4200 } 4201 } 4202 4203 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4204 // Make sure the type of the entry is already created. This is the type we 4205 // have to create: 4206 // struct __tgt_offload_entry{ 4207 // void *addr; // Pointer to the offload entry info. 4208 // // (function or global) 4209 // char *name; // Name of the function or global. 4210 // size_t size; // Size of the entry info (0 if it a function). 4211 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4212 // int32_t reserved; // Reserved, to use by the runtime library. 4213 // }; 4214 if (TgtOffloadEntryQTy.isNull()) { 4215 ASTContext &C = CGM.getContext(); 4216 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4217 RD->startDefinition(); 4218 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4219 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4220 addFieldToRecordDecl(C, RD, C.getSizeType()); 4221 addFieldToRecordDecl( 4222 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4223 addFieldToRecordDecl( 4224 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4225 RD->completeDefinition(); 4226 RD->addAttr(PackedAttr::CreateImplicit(C)); 4227 TgtOffloadEntryQTy = C.getRecordType(RD); 4228 } 4229 return TgtOffloadEntryQTy; 4230 } 4231 4232 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4233 // These are the types we need to build: 4234 // struct __tgt_device_image{ 4235 // void *ImageStart; // Pointer to the target code start. 4236 // void *ImageEnd; // Pointer to the target code end. 4237 // // We also add the host entries to the device image, as it may be useful 4238 // // for the target runtime to have access to that information. 4239 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4240 // // the entries. 4241 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4242 // // entries (non inclusive). 4243 // }; 4244 if (TgtDeviceImageQTy.isNull()) { 4245 ASTContext &C = CGM.getContext(); 4246 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4247 RD->startDefinition(); 4248 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4249 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4250 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4251 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4252 RD->completeDefinition(); 4253 TgtDeviceImageQTy = C.getRecordType(RD); 4254 } 4255 return TgtDeviceImageQTy; 4256 } 4257 4258 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4259 // struct __tgt_bin_desc{ 4260 // int32_t NumDevices; // Number of devices supported. 4261 // __tgt_device_image *DeviceImages; // Arrays of device images 4262 // // (one per device). 4263 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4264 // // entries. 4265 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4266 // // entries (non inclusive). 4267 // }; 4268 if (TgtBinaryDescriptorQTy.isNull()) { 4269 ASTContext &C = CGM.getContext(); 4270 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4271 RD->startDefinition(); 4272 addFieldToRecordDecl( 4273 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4274 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4275 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4276 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4277 RD->completeDefinition(); 4278 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4279 } 4280 return TgtBinaryDescriptorQTy; 4281 } 4282 4283 namespace { 4284 struct PrivateHelpersTy { 4285 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4286 const VarDecl *PrivateElemInit) 4287 : Original(Original), PrivateCopy(PrivateCopy), 4288 PrivateElemInit(PrivateElemInit) {} 4289 const VarDecl *Original; 4290 const VarDecl *PrivateCopy; 4291 const VarDecl *PrivateElemInit; 4292 }; 4293 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4294 } // anonymous namespace 4295 4296 static RecordDecl * 4297 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4298 if (!Privates.empty()) { 4299 ASTContext &C = CGM.getContext(); 4300 // Build struct .kmp_privates_t. { 4301 // /* private vars */ 4302 // }; 4303 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4304 RD->startDefinition(); 4305 for (const auto &Pair : Privates) { 4306 const VarDecl *VD = Pair.second.Original; 4307 QualType Type = VD->getType().getNonReferenceType(); 4308 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4309 if (VD->hasAttrs()) { 4310 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4311 E(VD->getAttrs().end()); 4312 I != E; ++I) 4313 FD->addAttr(*I); 4314 } 4315 } 4316 RD->completeDefinition(); 4317 return RD; 4318 } 4319 return nullptr; 4320 } 4321 4322 static RecordDecl * 4323 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4324 QualType KmpInt32Ty, 4325 QualType KmpRoutineEntryPointerQTy) { 4326 ASTContext &C = CGM.getContext(); 4327 // Build struct kmp_task_t { 4328 // void * shareds; 4329 // kmp_routine_entry_t routine; 4330 // kmp_int32 part_id; 4331 // kmp_cmplrdata_t data1; 4332 // kmp_cmplrdata_t data2; 4333 // For taskloops additional fields: 4334 // kmp_uint64 lb; 4335 // kmp_uint64 ub; 4336 // kmp_int64 st; 4337 // kmp_int32 liter; 4338 // void * reductions; 4339 // }; 4340 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4341 UD->startDefinition(); 4342 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4343 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4344 UD->completeDefinition(); 4345 QualType KmpCmplrdataTy = C.getRecordType(UD); 4346 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4347 RD->startDefinition(); 4348 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4349 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4350 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4351 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4352 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4353 if (isOpenMPTaskLoopDirective(Kind)) { 4354 QualType KmpUInt64Ty = 4355 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4356 QualType KmpInt64Ty = 4357 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4358 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4359 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4360 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4361 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4362 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4363 } 4364 RD->completeDefinition(); 4365 return RD; 4366 } 4367 4368 static RecordDecl * 4369 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4370 ArrayRef<PrivateDataTy> Privates) { 4371 ASTContext &C = CGM.getContext(); 4372 // Build struct kmp_task_t_with_privates { 4373 // kmp_task_t task_data; 4374 // .kmp_privates_t. privates; 4375 // }; 4376 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4377 RD->startDefinition(); 4378 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4379 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4380 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4381 RD->completeDefinition(); 4382 return RD; 4383 } 4384 4385 /// Emit a proxy function which accepts kmp_task_t as the second 4386 /// argument. 4387 /// \code 4388 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4389 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4390 /// For taskloops: 4391 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4392 /// tt->reductions, tt->shareds); 4393 /// return 0; 4394 /// } 4395 /// \endcode 4396 static llvm::Function * 4397 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4398 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4399 QualType KmpTaskTWithPrivatesPtrQTy, 4400 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4401 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4402 llvm::Value *TaskPrivatesMap) { 4403 ASTContext &C = CGM.getContext(); 4404 FunctionArgList Args; 4405 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4406 ImplicitParamDecl::Other); 4407 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4408 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4409 ImplicitParamDecl::Other); 4410 Args.push_back(&GtidArg); 4411 Args.push_back(&TaskTypeArg); 4412 const auto &TaskEntryFnInfo = 4413 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4414 llvm::FunctionType *TaskEntryTy = 4415 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4416 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4417 auto *TaskEntry = llvm::Function::Create( 4418 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4419 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4420 TaskEntry->setDoesNotRecurse(); 4421 CodeGenFunction CGF(CGM); 4422 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4423 Loc, Loc); 4424 4425 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4426 // tt, 4427 // For taskloops: 4428 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4429 // tt->task_data.shareds); 4430 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4431 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4432 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4433 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4434 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4435 const auto *KmpTaskTWithPrivatesQTyRD = 4436 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4437 LValue Base = 4438 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4439 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4440 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4441 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4442 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4443 4444 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4445 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4446 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4447 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4448 CGF.ConvertTypeForMem(SharedsPtrTy)); 4449 4450 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4451 llvm::Value *PrivatesParam; 4452 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4453 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4454 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4455 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4456 } else { 4457 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4458 } 4459 4460 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4461 TaskPrivatesMap, 4462 CGF.Builder 4463 .CreatePointerBitCastOrAddrSpaceCast( 4464 TDBase.getAddress(), CGF.VoidPtrTy) 4465 .getPointer()}; 4466 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4467 std::end(CommonArgs)); 4468 if (isOpenMPTaskLoopDirective(Kind)) { 4469 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4470 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4471 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4472 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4473 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4474 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4475 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4476 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4477 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4478 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4479 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4480 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4481 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4482 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4483 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4484 CallArgs.push_back(LBParam); 4485 CallArgs.push_back(UBParam); 4486 CallArgs.push_back(StParam); 4487 CallArgs.push_back(LIParam); 4488 CallArgs.push_back(RParam); 4489 } 4490 CallArgs.push_back(SharedsParam); 4491 4492 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4493 CallArgs); 4494 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4495 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4496 CGF.FinishFunction(); 4497 return TaskEntry; 4498 } 4499 4500 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4501 SourceLocation Loc, 4502 QualType KmpInt32Ty, 4503 QualType KmpTaskTWithPrivatesPtrQTy, 4504 QualType KmpTaskTWithPrivatesQTy) { 4505 ASTContext &C = CGM.getContext(); 4506 FunctionArgList Args; 4507 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4508 ImplicitParamDecl::Other); 4509 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4510 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4511 ImplicitParamDecl::Other); 4512 Args.push_back(&GtidArg); 4513 Args.push_back(&TaskTypeArg); 4514 const auto &DestructorFnInfo = 4515 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4516 llvm::FunctionType *DestructorFnTy = 4517 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4518 std::string Name = 4519 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4520 auto *DestructorFn = 4521 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4522 Name, &CGM.getModule()); 4523 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4524 DestructorFnInfo); 4525 DestructorFn->setDoesNotRecurse(); 4526 CodeGenFunction CGF(CGM); 4527 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4528 Args, Loc, Loc); 4529 4530 LValue Base = CGF.EmitLoadOfPointerLValue( 4531 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4532 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4533 const auto *KmpTaskTWithPrivatesQTyRD = 4534 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4535 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4536 Base = CGF.EmitLValueForField(Base, *FI); 4537 for (const auto *Field : 4538 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4539 if (QualType::DestructionKind DtorKind = 4540 Field->getType().isDestructedType()) { 4541 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4542 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4543 } 4544 } 4545 CGF.FinishFunction(); 4546 return DestructorFn; 4547 } 4548 4549 /// Emit a privates mapping function for correct handling of private and 4550 /// firstprivate variables. 4551 /// \code 4552 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4553 /// **noalias priv1,..., <tyn> **noalias privn) { 4554 /// *priv1 = &.privates.priv1; 4555 /// ...; 4556 /// *privn = &.privates.privn; 4557 /// } 4558 /// \endcode 4559 static llvm::Value * 4560 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4561 ArrayRef<const Expr *> PrivateVars, 4562 ArrayRef<const Expr *> FirstprivateVars, 4563 ArrayRef<const Expr *> LastprivateVars, 4564 QualType PrivatesQTy, 4565 ArrayRef<PrivateDataTy> Privates) { 4566 ASTContext &C = CGM.getContext(); 4567 FunctionArgList Args; 4568 ImplicitParamDecl TaskPrivatesArg( 4569 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4570 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4571 ImplicitParamDecl::Other); 4572 Args.push_back(&TaskPrivatesArg); 4573 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4574 unsigned Counter = 1; 4575 for (const Expr *E : PrivateVars) { 4576 Args.push_back(ImplicitParamDecl::Create( 4577 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4578 C.getPointerType(C.getPointerType(E->getType())) 4579 .withConst() 4580 .withRestrict(), 4581 ImplicitParamDecl::Other)); 4582 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4583 PrivateVarsPos[VD] = Counter; 4584 ++Counter; 4585 } 4586 for (const Expr *E : FirstprivateVars) { 4587 Args.push_back(ImplicitParamDecl::Create( 4588 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4589 C.getPointerType(C.getPointerType(E->getType())) 4590 .withConst() 4591 .withRestrict(), 4592 ImplicitParamDecl::Other)); 4593 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4594 PrivateVarsPos[VD] = Counter; 4595 ++Counter; 4596 } 4597 for (const Expr *E : LastprivateVars) { 4598 Args.push_back(ImplicitParamDecl::Create( 4599 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4600 C.getPointerType(C.getPointerType(E->getType())) 4601 .withConst() 4602 .withRestrict(), 4603 ImplicitParamDecl::Other)); 4604 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4605 PrivateVarsPos[VD] = Counter; 4606 ++Counter; 4607 } 4608 const auto &TaskPrivatesMapFnInfo = 4609 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4610 llvm::FunctionType *TaskPrivatesMapTy = 4611 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4612 std::string Name = 4613 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4614 auto *TaskPrivatesMap = llvm::Function::Create( 4615 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4616 &CGM.getModule()); 4617 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4618 TaskPrivatesMapFnInfo); 4619 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4620 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4621 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4622 CodeGenFunction CGF(CGM); 4623 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4624 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4625 4626 // *privi = &.privates.privi; 4627 LValue Base = CGF.EmitLoadOfPointerLValue( 4628 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4629 TaskPrivatesArg.getType()->castAs<PointerType>()); 4630 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4631 Counter = 0; 4632 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4633 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4634 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4635 LValue RefLVal = 4636 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4637 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4638 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4639 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4640 ++Counter; 4641 } 4642 CGF.FinishFunction(); 4643 return TaskPrivatesMap; 4644 } 4645 4646 static bool stable_sort_comparator(const PrivateDataTy P1, 4647 const PrivateDataTy P2) { 4648 return P1.first > P2.first; 4649 } 4650 4651 /// Emit initialization for private variables in task-based directives. 4652 static void emitPrivatesInit(CodeGenFunction &CGF, 4653 const OMPExecutableDirective &D, 4654 Address KmpTaskSharedsPtr, LValue TDBase, 4655 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4656 QualType SharedsTy, QualType SharedsPtrTy, 4657 const OMPTaskDataTy &Data, 4658 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4659 ASTContext &C = CGF.getContext(); 4660 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4661 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4662 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4663 ? OMPD_taskloop 4664 : OMPD_task; 4665 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4666 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4667 LValue SrcBase; 4668 bool IsTargetTask = 4669 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4670 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4671 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4672 // PointersArray and SizesArray. The original variables for these arrays are 4673 // not captured and we get their addresses explicitly. 4674 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4675 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4676 SrcBase = CGF.MakeAddrLValue( 4677 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4678 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4679 SharedsTy); 4680 } 4681 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4682 for (const PrivateDataTy &Pair : Privates) { 4683 const VarDecl *VD = Pair.second.PrivateCopy; 4684 const Expr *Init = VD->getAnyInitializer(); 4685 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4686 !CGF.isTrivialInitializer(Init)))) { 4687 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4688 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4689 const VarDecl *OriginalVD = Pair.second.Original; 4690 // Check if the variable is the target-based BasePointersArray, 4691 // PointersArray or SizesArray. 4692 LValue SharedRefLValue; 4693 QualType Type = OriginalVD->getType(); 4694 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4695 if (IsTargetTask && !SharedField) { 4696 assert(isa<ImplicitParamDecl>(OriginalVD) && 4697 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4698 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4699 ->getNumParams() == 0 && 4700 isa<TranslationUnitDecl>( 4701 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4702 ->getDeclContext()) && 4703 "Expected artificial target data variable."); 4704 SharedRefLValue = 4705 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4706 } else { 4707 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4708 SharedRefLValue = CGF.MakeAddrLValue( 4709 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4710 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4711 SharedRefLValue.getTBAAInfo()); 4712 } 4713 if (Type->isArrayType()) { 4714 // Initialize firstprivate array. 4715 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4716 // Perform simple memcpy. 4717 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4718 } else { 4719 // Initialize firstprivate array using element-by-element 4720 // initialization. 4721 CGF.EmitOMPAggregateAssign( 4722 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4723 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4724 Address SrcElement) { 4725 // Clean up any temporaries needed by the initialization. 4726 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4727 InitScope.addPrivate( 4728 Elem, [SrcElement]() -> Address { return SrcElement; }); 4729 (void)InitScope.Privatize(); 4730 // Emit initialization for single element. 4731 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4732 CGF, &CapturesInfo); 4733 CGF.EmitAnyExprToMem(Init, DestElement, 4734 Init->getType().getQualifiers(), 4735 /*IsInitializer=*/false); 4736 }); 4737 } 4738 } else { 4739 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4740 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4741 return SharedRefLValue.getAddress(); 4742 }); 4743 (void)InitScope.Privatize(); 4744 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4745 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4746 /*capturedByInit=*/false); 4747 } 4748 } else { 4749 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4750 } 4751 } 4752 ++FI; 4753 } 4754 } 4755 4756 /// Check if duplication function is required for taskloops. 4757 static bool checkInitIsRequired(CodeGenFunction &CGF, 4758 ArrayRef<PrivateDataTy> Privates) { 4759 bool InitRequired = false; 4760 for (const PrivateDataTy &Pair : Privates) { 4761 const VarDecl *VD = Pair.second.PrivateCopy; 4762 const Expr *Init = VD->getAnyInitializer(); 4763 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4764 !CGF.isTrivialInitializer(Init)); 4765 if (InitRequired) 4766 break; 4767 } 4768 return InitRequired; 4769 } 4770 4771 4772 /// Emit task_dup function (for initialization of 4773 /// private/firstprivate/lastprivate vars and last_iter flag) 4774 /// \code 4775 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4776 /// lastpriv) { 4777 /// // setup lastprivate flag 4778 /// task_dst->last = lastpriv; 4779 /// // could be constructor calls here... 4780 /// } 4781 /// \endcode 4782 static llvm::Value * 4783 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4784 const OMPExecutableDirective &D, 4785 QualType KmpTaskTWithPrivatesPtrQTy, 4786 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4787 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4788 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4789 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4790 ASTContext &C = CGM.getContext(); 4791 FunctionArgList Args; 4792 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4793 KmpTaskTWithPrivatesPtrQTy, 4794 ImplicitParamDecl::Other); 4795 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4796 KmpTaskTWithPrivatesPtrQTy, 4797 ImplicitParamDecl::Other); 4798 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4799 ImplicitParamDecl::Other); 4800 Args.push_back(&DstArg); 4801 Args.push_back(&SrcArg); 4802 Args.push_back(&LastprivArg); 4803 const auto &TaskDupFnInfo = 4804 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4805 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4806 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4807 auto *TaskDup = llvm::Function::Create( 4808 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4809 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4810 TaskDup->setDoesNotRecurse(); 4811 CodeGenFunction CGF(CGM); 4812 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4813 Loc); 4814 4815 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4816 CGF.GetAddrOfLocalVar(&DstArg), 4817 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4818 // task_dst->liter = lastpriv; 4819 if (WithLastIter) { 4820 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4821 LValue Base = CGF.EmitLValueForField( 4822 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4823 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4824 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4825 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4826 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4827 } 4828 4829 // Emit initial values for private copies (if any). 4830 assert(!Privates.empty()); 4831 Address KmpTaskSharedsPtr = Address::invalid(); 4832 if (!Data.FirstprivateVars.empty()) { 4833 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4834 CGF.GetAddrOfLocalVar(&SrcArg), 4835 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4836 LValue Base = CGF.EmitLValueForField( 4837 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4838 KmpTaskSharedsPtr = Address( 4839 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4840 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4841 KmpTaskTShareds)), 4842 Loc), 4843 CGF.getNaturalTypeAlignment(SharedsTy)); 4844 } 4845 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4846 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4847 CGF.FinishFunction(); 4848 return TaskDup; 4849 } 4850 4851 /// Checks if destructor function is required to be generated. 4852 /// \return true if cleanups are required, false otherwise. 4853 static bool 4854 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4855 bool NeedsCleanup = false; 4856 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4857 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4858 for (const FieldDecl *FD : PrivateRD->fields()) { 4859 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4860 if (NeedsCleanup) 4861 break; 4862 } 4863 return NeedsCleanup; 4864 } 4865 4866 CGOpenMPRuntime::TaskResultTy 4867 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4868 const OMPExecutableDirective &D, 4869 llvm::Function *TaskFunction, QualType SharedsTy, 4870 Address Shareds, const OMPTaskDataTy &Data) { 4871 ASTContext &C = CGM.getContext(); 4872 llvm::SmallVector<PrivateDataTy, 4> Privates; 4873 // Aggregate privates and sort them by the alignment. 4874 auto I = Data.PrivateCopies.begin(); 4875 for (const Expr *E : Data.PrivateVars) { 4876 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4877 Privates.emplace_back( 4878 C.getDeclAlign(VD), 4879 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4880 /*PrivateElemInit=*/nullptr)); 4881 ++I; 4882 } 4883 I = Data.FirstprivateCopies.begin(); 4884 auto IElemInitRef = Data.FirstprivateInits.begin(); 4885 for (const Expr *E : Data.FirstprivateVars) { 4886 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4887 Privates.emplace_back( 4888 C.getDeclAlign(VD), 4889 PrivateHelpersTy( 4890 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4891 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4892 ++I; 4893 ++IElemInitRef; 4894 } 4895 I = Data.LastprivateCopies.begin(); 4896 for (const Expr *E : Data.LastprivateVars) { 4897 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4898 Privates.emplace_back( 4899 C.getDeclAlign(VD), 4900 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4901 /*PrivateElemInit=*/nullptr)); 4902 ++I; 4903 } 4904 std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); 4905 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4906 // Build type kmp_routine_entry_t (if not built yet). 4907 emitKmpRoutineEntryT(KmpInt32Ty); 4908 // Build type kmp_task_t (if not built yet). 4909 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4910 if (SavedKmpTaskloopTQTy.isNull()) { 4911 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4912 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4913 } 4914 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4915 } else { 4916 assert((D.getDirectiveKind() == OMPD_task || 4917 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4918 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4919 "Expected taskloop, task or target directive"); 4920 if (SavedKmpTaskTQTy.isNull()) { 4921 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4922 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4923 } 4924 KmpTaskTQTy = SavedKmpTaskTQTy; 4925 } 4926 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4927 // Build particular struct kmp_task_t for the given task. 4928 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4929 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4930 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4931 QualType KmpTaskTWithPrivatesPtrQTy = 4932 C.getPointerType(KmpTaskTWithPrivatesQTy); 4933 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4934 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4935 KmpTaskTWithPrivatesTy->getPointerTo(); 4936 llvm::Value *KmpTaskTWithPrivatesTySize = 4937 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4938 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4939 4940 // Emit initial values for private copies (if any). 4941 llvm::Value *TaskPrivatesMap = nullptr; 4942 llvm::Type *TaskPrivatesMapTy = 4943 std::next(TaskFunction->arg_begin(), 3)->getType(); 4944 if (!Privates.empty()) { 4945 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4946 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4947 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4948 FI->getType(), Privates); 4949 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4950 TaskPrivatesMap, TaskPrivatesMapTy); 4951 } else { 4952 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4953 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4954 } 4955 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4956 // kmp_task_t *tt); 4957 llvm::Function *TaskEntry = emitProxyTaskFunction( 4958 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4959 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4960 TaskPrivatesMap); 4961 4962 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4963 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4964 // kmp_routine_entry_t *task_entry); 4965 // Task flags. Format is taken from 4966 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4967 // description of kmp_tasking_flags struct. 4968 enum { 4969 TiedFlag = 0x1, 4970 FinalFlag = 0x2, 4971 DestructorsFlag = 0x8, 4972 PriorityFlag = 0x20 4973 }; 4974 unsigned Flags = Data.Tied ? TiedFlag : 0; 4975 bool NeedsCleanup = false; 4976 if (!Privates.empty()) { 4977 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4978 if (NeedsCleanup) 4979 Flags = Flags | DestructorsFlag; 4980 } 4981 if (Data.Priority.getInt()) 4982 Flags = Flags | PriorityFlag; 4983 llvm::Value *TaskFlags = 4984 Data.Final.getPointer() 4985 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4986 CGF.Builder.getInt32(FinalFlag), 4987 CGF.Builder.getInt32(/*C=*/0)) 4988 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4989 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4990 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4991 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 4992 getThreadID(CGF, Loc), TaskFlags, 4993 KmpTaskTWithPrivatesTySize, SharedsSize, 4994 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4995 TaskEntry, KmpRoutineEntryPtrTy)}; 4996 llvm::Value *NewTask = CGF.EmitRuntimeCall( 4997 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 4998 llvm::Value *NewTaskNewTaskTTy = 4999 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5000 NewTask, KmpTaskTWithPrivatesPtrTy); 5001 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5002 KmpTaskTWithPrivatesQTy); 5003 LValue TDBase = 5004 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5005 // Fill the data in the resulting kmp_task_t record. 5006 // Copy shareds if there are any. 5007 Address KmpTaskSharedsPtr = Address::invalid(); 5008 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5009 KmpTaskSharedsPtr = 5010 Address(CGF.EmitLoadOfScalar( 5011 CGF.EmitLValueForField( 5012 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5013 KmpTaskTShareds)), 5014 Loc), 5015 CGF.getNaturalTypeAlignment(SharedsTy)); 5016 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5017 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5018 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5019 } 5020 // Emit initial values for private copies (if any). 5021 TaskResultTy Result; 5022 if (!Privates.empty()) { 5023 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5024 SharedsTy, SharedsPtrTy, Data, Privates, 5025 /*ForDup=*/false); 5026 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5027 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5028 Result.TaskDupFn = emitTaskDupFunction( 5029 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5030 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5031 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5032 } 5033 } 5034 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5035 enum { Priority = 0, Destructors = 1 }; 5036 // Provide pointer to function with destructors for privates. 5037 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5038 const RecordDecl *KmpCmplrdataUD = 5039 (*FI)->getType()->getAsUnionType()->getDecl(); 5040 if (NeedsCleanup) { 5041 llvm::Value *DestructorFn = emitDestructorsFunction( 5042 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5043 KmpTaskTWithPrivatesQTy); 5044 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5045 LValue DestructorsLV = CGF.EmitLValueForField( 5046 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5047 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5048 DestructorFn, KmpRoutineEntryPtrTy), 5049 DestructorsLV); 5050 } 5051 // Set priority. 5052 if (Data.Priority.getInt()) { 5053 LValue Data2LV = CGF.EmitLValueForField( 5054 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5055 LValue PriorityLV = CGF.EmitLValueForField( 5056 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5057 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5058 } 5059 Result.NewTask = NewTask; 5060 Result.TaskEntry = TaskEntry; 5061 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5062 Result.TDBase = TDBase; 5063 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5064 return Result; 5065 } 5066 5067 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5068 const OMPExecutableDirective &D, 5069 llvm::Function *TaskFunction, 5070 QualType SharedsTy, Address Shareds, 5071 const Expr *IfCond, 5072 const OMPTaskDataTy &Data) { 5073 if (!CGF.HaveInsertPoint()) 5074 return; 5075 5076 TaskResultTy Result = 5077 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5078 llvm::Value *NewTask = Result.NewTask; 5079 llvm::Function *TaskEntry = Result.TaskEntry; 5080 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5081 LValue TDBase = Result.TDBase; 5082 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5083 ASTContext &C = CGM.getContext(); 5084 // Process list of dependences. 5085 Address DependenciesArray = Address::invalid(); 5086 unsigned NumDependencies = Data.Dependences.size(); 5087 if (NumDependencies) { 5088 // Dependence kind for RTL. 5089 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5090 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5091 RecordDecl *KmpDependInfoRD; 5092 QualType FlagsTy = 5093 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5094 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5095 if (KmpDependInfoTy.isNull()) { 5096 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5097 KmpDependInfoRD->startDefinition(); 5098 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5099 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5100 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5101 KmpDependInfoRD->completeDefinition(); 5102 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5103 } else { 5104 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5105 } 5106 // Define type kmp_depend_info[<Dependences.size()>]; 5107 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5108 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5109 ArrayType::Normal, /*IndexTypeQuals=*/0); 5110 // kmp_depend_info[<Dependences.size()>] deps; 5111 DependenciesArray = 5112 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5113 for (unsigned I = 0; I < NumDependencies; ++I) { 5114 const Expr *E = Data.Dependences[I].second; 5115 LValue Addr = CGF.EmitLValue(E); 5116 llvm::Value *Size; 5117 QualType Ty = E->getType(); 5118 if (const auto *ASE = 5119 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5120 LValue UpAddrLVal = 5121 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 5122 llvm::Value *UpAddr = 5123 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5124 llvm::Value *LowIntPtr = 5125 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5126 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5127 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5128 } else { 5129 Size = CGF.getTypeSize(Ty); 5130 } 5131 LValue Base = CGF.MakeAddrLValue( 5132 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5133 KmpDependInfoTy); 5134 // deps[i].base_addr = &<Dependences[i].second>; 5135 LValue BaseAddrLVal = CGF.EmitLValueForField( 5136 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5137 CGF.EmitStoreOfScalar( 5138 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5139 BaseAddrLVal); 5140 // deps[i].len = sizeof(<Dependences[i].second>); 5141 LValue LenLVal = CGF.EmitLValueForField( 5142 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5143 CGF.EmitStoreOfScalar(Size, LenLVal); 5144 // deps[i].flags = <Dependences[i].first>; 5145 RTLDependenceKindTy DepKind; 5146 switch (Data.Dependences[I].first) { 5147 case OMPC_DEPEND_in: 5148 DepKind = DepIn; 5149 break; 5150 // Out and InOut dependencies must use the same code. 5151 case OMPC_DEPEND_out: 5152 case OMPC_DEPEND_inout: 5153 DepKind = DepInOut; 5154 break; 5155 case OMPC_DEPEND_mutexinoutset: 5156 DepKind = DepMutexInOutSet; 5157 break; 5158 case OMPC_DEPEND_source: 5159 case OMPC_DEPEND_sink: 5160 case OMPC_DEPEND_unknown: 5161 llvm_unreachable("Unknown task dependence type"); 5162 } 5163 LValue FlagsLVal = CGF.EmitLValueForField( 5164 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5165 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5166 FlagsLVal); 5167 } 5168 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5169 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5170 } 5171 5172 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5173 // libcall. 5174 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5175 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5176 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5177 // list is not empty 5178 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5179 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5180 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5181 llvm::Value *DepTaskArgs[7]; 5182 if (NumDependencies) { 5183 DepTaskArgs[0] = UpLoc; 5184 DepTaskArgs[1] = ThreadID; 5185 DepTaskArgs[2] = NewTask; 5186 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5187 DepTaskArgs[4] = DependenciesArray.getPointer(); 5188 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5189 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5190 } 5191 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5192 &TaskArgs, 5193 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5194 if (!Data.Tied) { 5195 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5196 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5197 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5198 } 5199 if (NumDependencies) { 5200 CGF.EmitRuntimeCall( 5201 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5202 } else { 5203 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5204 TaskArgs); 5205 } 5206 // Check if parent region is untied and build return for untied task; 5207 if (auto *Region = 5208 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5209 Region->emitUntiedSwitch(CGF); 5210 }; 5211 5212 llvm::Value *DepWaitTaskArgs[6]; 5213 if (NumDependencies) { 5214 DepWaitTaskArgs[0] = UpLoc; 5215 DepWaitTaskArgs[1] = ThreadID; 5216 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5217 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5218 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5219 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5220 } 5221 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5222 NumDependencies, &DepWaitTaskArgs, 5223 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5224 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5225 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5226 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5227 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5228 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5229 // is specified. 5230 if (NumDependencies) 5231 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5232 DepWaitTaskArgs); 5233 // Call proxy_task_entry(gtid, new_task); 5234 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5235 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5236 Action.Enter(CGF); 5237 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5238 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5239 OutlinedFnArgs); 5240 }; 5241 5242 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5243 // kmp_task_t *new_task); 5244 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5245 // kmp_task_t *new_task); 5246 RegionCodeGenTy RCG(CodeGen); 5247 CommonActionTy Action( 5248 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5249 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5250 RCG.setAction(Action); 5251 RCG(CGF); 5252 }; 5253 5254 if (IfCond) { 5255 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5256 } else { 5257 RegionCodeGenTy ThenRCG(ThenCodeGen); 5258 ThenRCG(CGF); 5259 } 5260 } 5261 5262 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5263 const OMPLoopDirective &D, 5264 llvm::Function *TaskFunction, 5265 QualType SharedsTy, Address Shareds, 5266 const Expr *IfCond, 5267 const OMPTaskDataTy &Data) { 5268 if (!CGF.HaveInsertPoint()) 5269 return; 5270 TaskResultTy Result = 5271 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5272 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5273 // libcall. 5274 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5275 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5276 // sched, kmp_uint64 grainsize, void *task_dup); 5277 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5278 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5279 llvm::Value *IfVal; 5280 if (IfCond) { 5281 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5282 /*isSigned=*/true); 5283 } else { 5284 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5285 } 5286 5287 LValue LBLVal = CGF.EmitLValueForField( 5288 Result.TDBase, 5289 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5290 const auto *LBVar = 5291 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5292 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5293 /*IsInitializer=*/true); 5294 LValue UBLVal = CGF.EmitLValueForField( 5295 Result.TDBase, 5296 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5297 const auto *UBVar = 5298 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5299 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5300 /*IsInitializer=*/true); 5301 LValue StLVal = CGF.EmitLValueForField( 5302 Result.TDBase, 5303 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5304 const auto *StVar = 5305 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5306 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5307 /*IsInitializer=*/true); 5308 // Store reductions address. 5309 LValue RedLVal = CGF.EmitLValueForField( 5310 Result.TDBase, 5311 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5312 if (Data.Reductions) { 5313 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5314 } else { 5315 CGF.EmitNullInitialization(RedLVal.getAddress(), 5316 CGF.getContext().VoidPtrTy); 5317 } 5318 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5319 llvm::Value *TaskArgs[] = { 5320 UpLoc, 5321 ThreadID, 5322 Result.NewTask, 5323 IfVal, 5324 LBLVal.getPointer(), 5325 UBLVal.getPointer(), 5326 CGF.EmitLoadOfScalar(StLVal, Loc), 5327 llvm::ConstantInt::getSigned( 5328 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5329 llvm::ConstantInt::getSigned( 5330 CGF.IntTy, Data.Schedule.getPointer() 5331 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5332 : NoSchedule), 5333 Data.Schedule.getPointer() 5334 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5335 /*isSigned=*/false) 5336 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5337 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5338 Result.TaskDupFn, CGF.VoidPtrTy) 5339 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5340 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5341 } 5342 5343 /// Emit reduction operation for each element of array (required for 5344 /// array sections) LHS op = RHS. 5345 /// \param Type Type of array. 5346 /// \param LHSVar Variable on the left side of the reduction operation 5347 /// (references element of array in original variable). 5348 /// \param RHSVar Variable on the right side of the reduction operation 5349 /// (references element of array in original variable). 5350 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5351 /// RHSVar. 5352 static void EmitOMPAggregateReduction( 5353 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5354 const VarDecl *RHSVar, 5355 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5356 const Expr *, const Expr *)> &RedOpGen, 5357 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5358 const Expr *UpExpr = nullptr) { 5359 // Perform element-by-element initialization. 5360 QualType ElementTy; 5361 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5362 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5363 5364 // Drill down to the base element type on both arrays. 5365 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5366 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5367 5368 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5369 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5370 // Cast from pointer to array type to pointer to single element. 5371 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5372 // The basic structure here is a while-do loop. 5373 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5374 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5375 llvm::Value *IsEmpty = 5376 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5377 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5378 5379 // Enter the loop body, making that address the current address. 5380 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5381 CGF.EmitBlock(BodyBB); 5382 5383 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5384 5385 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5386 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5387 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5388 Address RHSElementCurrent = 5389 Address(RHSElementPHI, 5390 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5391 5392 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5393 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5394 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5395 Address LHSElementCurrent = 5396 Address(LHSElementPHI, 5397 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5398 5399 // Emit copy. 5400 CodeGenFunction::OMPPrivateScope Scope(CGF); 5401 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5402 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5403 Scope.Privatize(); 5404 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5405 Scope.ForceCleanup(); 5406 5407 // Shift the address forward by one element. 5408 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5409 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5410 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5411 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5412 // Check whether we've reached the end. 5413 llvm::Value *Done = 5414 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5415 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5416 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5417 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5418 5419 // Done. 5420 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5421 } 5422 5423 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5424 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5425 /// UDR combiner function. 5426 static void emitReductionCombiner(CodeGenFunction &CGF, 5427 const Expr *ReductionOp) { 5428 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5429 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5430 if (const auto *DRE = 5431 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5432 if (const auto *DRD = 5433 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5434 std::pair<llvm::Function *, llvm::Function *> Reduction = 5435 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5436 RValue Func = RValue::get(Reduction.first); 5437 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5438 CGF.EmitIgnoredExpr(ReductionOp); 5439 return; 5440 } 5441 CGF.EmitIgnoredExpr(ReductionOp); 5442 } 5443 5444 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5445 CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, 5446 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5447 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5448 ASTContext &C = CGM.getContext(); 5449 5450 // void reduction_func(void *LHSArg, void *RHSArg); 5451 FunctionArgList Args; 5452 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5453 ImplicitParamDecl::Other); 5454 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5455 ImplicitParamDecl::Other); 5456 Args.push_back(&LHSArg); 5457 Args.push_back(&RHSArg); 5458 const auto &CGFI = 5459 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5460 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5461 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5462 llvm::GlobalValue::InternalLinkage, Name, 5463 &CGM.getModule()); 5464 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5465 Fn->setDoesNotRecurse(); 5466 CodeGenFunction CGF(CGM); 5467 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5468 5469 // Dst = (void*[n])(LHSArg); 5470 // Src = (void*[n])(RHSArg); 5471 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5472 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5473 ArgsType), CGF.getPointerAlign()); 5474 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5475 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5476 ArgsType), CGF.getPointerAlign()); 5477 5478 // ... 5479 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5480 // ... 5481 CodeGenFunction::OMPPrivateScope Scope(CGF); 5482 auto IPriv = Privates.begin(); 5483 unsigned Idx = 0; 5484 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5485 const auto *RHSVar = 5486 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5487 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5488 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5489 }); 5490 const auto *LHSVar = 5491 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5492 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5493 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5494 }); 5495 QualType PrivTy = (*IPriv)->getType(); 5496 if (PrivTy->isVariablyModifiedType()) { 5497 // Get array size and emit VLA type. 5498 ++Idx; 5499 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5500 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5501 const VariableArrayType *VLA = 5502 CGF.getContext().getAsVariableArrayType(PrivTy); 5503 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5504 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5505 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5506 CGF.EmitVariablyModifiedType(PrivTy); 5507 } 5508 } 5509 Scope.Privatize(); 5510 IPriv = Privates.begin(); 5511 auto ILHS = LHSExprs.begin(); 5512 auto IRHS = RHSExprs.begin(); 5513 for (const Expr *E : ReductionOps) { 5514 if ((*IPriv)->getType()->isArrayType()) { 5515 // Emit reduction for array section. 5516 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5517 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5518 EmitOMPAggregateReduction( 5519 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5520 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5521 emitReductionCombiner(CGF, E); 5522 }); 5523 } else { 5524 // Emit reduction for array subscript or single variable. 5525 emitReductionCombiner(CGF, E); 5526 } 5527 ++IPriv; 5528 ++ILHS; 5529 ++IRHS; 5530 } 5531 Scope.ForceCleanup(); 5532 CGF.FinishFunction(); 5533 return Fn; 5534 } 5535 5536 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5537 const Expr *ReductionOp, 5538 const Expr *PrivateRef, 5539 const DeclRefExpr *LHS, 5540 const DeclRefExpr *RHS) { 5541 if (PrivateRef->getType()->isArrayType()) { 5542 // Emit reduction for array section. 5543 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5544 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5545 EmitOMPAggregateReduction( 5546 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5547 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5548 emitReductionCombiner(CGF, ReductionOp); 5549 }); 5550 } else { 5551 // Emit reduction for array subscript or single variable. 5552 emitReductionCombiner(CGF, ReductionOp); 5553 } 5554 } 5555 5556 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5557 ArrayRef<const Expr *> Privates, 5558 ArrayRef<const Expr *> LHSExprs, 5559 ArrayRef<const Expr *> RHSExprs, 5560 ArrayRef<const Expr *> ReductionOps, 5561 ReductionOptionsTy Options) { 5562 if (!CGF.HaveInsertPoint()) 5563 return; 5564 5565 bool WithNowait = Options.WithNowait; 5566 bool SimpleReduction = Options.SimpleReduction; 5567 5568 // Next code should be emitted for reduction: 5569 // 5570 // static kmp_critical_name lock = { 0 }; 5571 // 5572 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5573 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5574 // ... 5575 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5576 // *(Type<n>-1*)rhs[<n>-1]); 5577 // } 5578 // 5579 // ... 5580 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5581 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5582 // RedList, reduce_func, &<lock>)) { 5583 // case 1: 5584 // ... 5585 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5586 // ... 5587 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5588 // break; 5589 // case 2: 5590 // ... 5591 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5592 // ... 5593 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5594 // break; 5595 // default:; 5596 // } 5597 // 5598 // if SimpleReduction is true, only the next code is generated: 5599 // ... 5600 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5601 // ... 5602 5603 ASTContext &C = CGM.getContext(); 5604 5605 if (SimpleReduction) { 5606 CodeGenFunction::RunCleanupsScope Scope(CGF); 5607 auto IPriv = Privates.begin(); 5608 auto ILHS = LHSExprs.begin(); 5609 auto IRHS = RHSExprs.begin(); 5610 for (const Expr *E : ReductionOps) { 5611 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5612 cast<DeclRefExpr>(*IRHS)); 5613 ++IPriv; 5614 ++ILHS; 5615 ++IRHS; 5616 } 5617 return; 5618 } 5619 5620 // 1. Build a list of reduction variables. 5621 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5622 auto Size = RHSExprs.size(); 5623 for (const Expr *E : Privates) { 5624 if (E->getType()->isVariablyModifiedType()) 5625 // Reserve place for array size. 5626 ++Size; 5627 } 5628 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5629 QualType ReductionArrayTy = 5630 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5631 /*IndexTypeQuals=*/0); 5632 Address ReductionList = 5633 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5634 auto IPriv = Privates.begin(); 5635 unsigned Idx = 0; 5636 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5637 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5638 CGF.Builder.CreateStore( 5639 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5640 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5641 Elem); 5642 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5643 // Store array size. 5644 ++Idx; 5645 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5646 llvm::Value *Size = CGF.Builder.CreateIntCast( 5647 CGF.getVLASize( 5648 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5649 .NumElts, 5650 CGF.SizeTy, /*isSigned=*/false); 5651 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5652 Elem); 5653 } 5654 } 5655 5656 // 2. Emit reduce_func(). 5657 llvm::Function *ReductionFn = emitReductionFunction( 5658 CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), 5659 Privates, LHSExprs, RHSExprs, ReductionOps); 5660 5661 // 3. Create static kmp_critical_name lock = { 0 }; 5662 std::string Name = getName({"reduction"}); 5663 llvm::Value *Lock = getCriticalRegionLock(Name); 5664 5665 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5666 // RedList, reduce_func, &<lock>); 5667 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5668 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5669 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5670 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5671 ReductionList.getPointer(), CGF.VoidPtrTy); 5672 llvm::Value *Args[] = { 5673 IdentTLoc, // ident_t *<loc> 5674 ThreadId, // i32 <gtid> 5675 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5676 ReductionArrayTySize, // size_type sizeof(RedList) 5677 RL, // void *RedList 5678 ReductionFn, // void (*) (void *, void *) <reduce_func> 5679 Lock // kmp_critical_name *&<lock> 5680 }; 5681 llvm::Value *Res = CGF.EmitRuntimeCall( 5682 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5683 : OMPRTL__kmpc_reduce), 5684 Args); 5685 5686 // 5. Build switch(res) 5687 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5688 llvm::SwitchInst *SwInst = 5689 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5690 5691 // 6. Build case 1: 5692 // ... 5693 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5694 // ... 5695 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5696 // break; 5697 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5698 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5699 CGF.EmitBlock(Case1BB); 5700 5701 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5702 llvm::Value *EndArgs[] = { 5703 IdentTLoc, // ident_t *<loc> 5704 ThreadId, // i32 <gtid> 5705 Lock // kmp_critical_name *&<lock> 5706 }; 5707 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5708 CodeGenFunction &CGF, PrePostActionTy &Action) { 5709 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5710 auto IPriv = Privates.begin(); 5711 auto ILHS = LHSExprs.begin(); 5712 auto IRHS = RHSExprs.begin(); 5713 for (const Expr *E : ReductionOps) { 5714 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5715 cast<DeclRefExpr>(*IRHS)); 5716 ++IPriv; 5717 ++ILHS; 5718 ++IRHS; 5719 } 5720 }; 5721 RegionCodeGenTy RCG(CodeGen); 5722 CommonActionTy Action( 5723 nullptr, llvm::None, 5724 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5725 : OMPRTL__kmpc_end_reduce), 5726 EndArgs); 5727 RCG.setAction(Action); 5728 RCG(CGF); 5729 5730 CGF.EmitBranch(DefaultBB); 5731 5732 // 7. Build case 2: 5733 // ... 5734 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5735 // ... 5736 // break; 5737 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5738 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5739 CGF.EmitBlock(Case2BB); 5740 5741 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5742 CodeGenFunction &CGF, PrePostActionTy &Action) { 5743 auto ILHS = LHSExprs.begin(); 5744 auto IRHS = RHSExprs.begin(); 5745 auto IPriv = Privates.begin(); 5746 for (const Expr *E : ReductionOps) { 5747 const Expr *XExpr = nullptr; 5748 const Expr *EExpr = nullptr; 5749 const Expr *UpExpr = nullptr; 5750 BinaryOperatorKind BO = BO_Comma; 5751 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5752 if (BO->getOpcode() == BO_Assign) { 5753 XExpr = BO->getLHS(); 5754 UpExpr = BO->getRHS(); 5755 } 5756 } 5757 // Try to emit update expression as a simple atomic. 5758 const Expr *RHSExpr = UpExpr; 5759 if (RHSExpr) { 5760 // Analyze RHS part of the whole expression. 5761 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5762 RHSExpr->IgnoreParenImpCasts())) { 5763 // If this is a conditional operator, analyze its condition for 5764 // min/max reduction operator. 5765 RHSExpr = ACO->getCond(); 5766 } 5767 if (const auto *BORHS = 5768 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5769 EExpr = BORHS->getRHS(); 5770 BO = BORHS->getOpcode(); 5771 } 5772 } 5773 if (XExpr) { 5774 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5775 auto &&AtomicRedGen = [BO, VD, 5776 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5777 const Expr *EExpr, const Expr *UpExpr) { 5778 LValue X = CGF.EmitLValue(XExpr); 5779 RValue E; 5780 if (EExpr) 5781 E = CGF.EmitAnyExpr(EExpr); 5782 CGF.EmitOMPAtomicSimpleUpdateExpr( 5783 X, E, BO, /*IsXLHSInRHSPart=*/true, 5784 llvm::AtomicOrdering::Monotonic, Loc, 5785 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5786 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5787 PrivateScope.addPrivate( 5788 VD, [&CGF, VD, XRValue, Loc]() { 5789 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5790 CGF.emitOMPSimpleStore( 5791 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5792 VD->getType().getNonReferenceType(), Loc); 5793 return LHSTemp; 5794 }); 5795 (void)PrivateScope.Privatize(); 5796 return CGF.EmitAnyExpr(UpExpr); 5797 }); 5798 }; 5799 if ((*IPriv)->getType()->isArrayType()) { 5800 // Emit atomic reduction for array section. 5801 const auto *RHSVar = 5802 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5803 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5804 AtomicRedGen, XExpr, EExpr, UpExpr); 5805 } else { 5806 // Emit atomic reduction for array subscript or single variable. 5807 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5808 } 5809 } else { 5810 // Emit as a critical region. 5811 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5812 const Expr *, const Expr *) { 5813 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5814 std::string Name = RT.getName({"atomic_reduction"}); 5815 RT.emitCriticalRegion( 5816 CGF, Name, 5817 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5818 Action.Enter(CGF); 5819 emitReductionCombiner(CGF, E); 5820 }, 5821 Loc); 5822 }; 5823 if ((*IPriv)->getType()->isArrayType()) { 5824 const auto *LHSVar = 5825 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5826 const auto *RHSVar = 5827 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5828 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5829 CritRedGen); 5830 } else { 5831 CritRedGen(CGF, nullptr, nullptr, nullptr); 5832 } 5833 } 5834 ++ILHS; 5835 ++IRHS; 5836 ++IPriv; 5837 } 5838 }; 5839 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5840 if (!WithNowait) { 5841 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5842 llvm::Value *EndArgs[] = { 5843 IdentTLoc, // ident_t *<loc> 5844 ThreadId, // i32 <gtid> 5845 Lock // kmp_critical_name *&<lock> 5846 }; 5847 CommonActionTy Action(nullptr, llvm::None, 5848 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5849 EndArgs); 5850 AtomicRCG.setAction(Action); 5851 AtomicRCG(CGF); 5852 } else { 5853 AtomicRCG(CGF); 5854 } 5855 5856 CGF.EmitBranch(DefaultBB); 5857 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5858 } 5859 5860 /// Generates unique name for artificial threadprivate variables. 5861 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5862 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5863 const Expr *Ref) { 5864 SmallString<256> Buffer; 5865 llvm::raw_svector_ostream Out(Buffer); 5866 const clang::DeclRefExpr *DE; 5867 const VarDecl *D = ::getBaseDecl(Ref, DE); 5868 if (!D) 5869 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5870 D = D->getCanonicalDecl(); 5871 std::string Name = CGM.getOpenMPRuntime().getName( 5872 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5873 Out << Prefix << Name << "_" 5874 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5875 return Out.str(); 5876 } 5877 5878 /// Emits reduction initializer function: 5879 /// \code 5880 /// void @.red_init(void* %arg) { 5881 /// %0 = bitcast void* %arg to <type>* 5882 /// store <type> <init>, <type>* %0 5883 /// ret void 5884 /// } 5885 /// \endcode 5886 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5887 SourceLocation Loc, 5888 ReductionCodeGen &RCG, unsigned N) { 5889 ASTContext &C = CGM.getContext(); 5890 FunctionArgList Args; 5891 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5892 ImplicitParamDecl::Other); 5893 Args.emplace_back(&Param); 5894 const auto &FnInfo = 5895 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5896 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5897 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5898 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5899 Name, &CGM.getModule()); 5900 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5901 Fn->setDoesNotRecurse(); 5902 CodeGenFunction CGF(CGM); 5903 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5904 Address PrivateAddr = CGF.EmitLoadOfPointer( 5905 CGF.GetAddrOfLocalVar(&Param), 5906 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5907 llvm::Value *Size = nullptr; 5908 // If the size of the reduction item is non-constant, load it from global 5909 // threadprivate variable. 5910 if (RCG.getSizes(N).second) { 5911 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5912 CGF, CGM.getContext().getSizeType(), 5913 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5914 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5915 CGM.getContext().getSizeType(), Loc); 5916 } 5917 RCG.emitAggregateType(CGF, N, Size); 5918 LValue SharedLVal; 5919 // If initializer uses initializer from declare reduction construct, emit a 5920 // pointer to the address of the original reduction item (reuired by reduction 5921 // initializer) 5922 if (RCG.usesReductionInitializer(N)) { 5923 Address SharedAddr = 5924 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5925 CGF, CGM.getContext().VoidPtrTy, 5926 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 5927 SharedAddr = CGF.EmitLoadOfPointer( 5928 SharedAddr, 5929 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5930 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5931 } else { 5932 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5933 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5934 CGM.getContext().VoidPtrTy); 5935 } 5936 // Emit the initializer: 5937 // %0 = bitcast void* %arg to <type>* 5938 // store <type> <init>, <type>* %0 5939 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5940 [](CodeGenFunction &) { return false; }); 5941 CGF.FinishFunction(); 5942 return Fn; 5943 } 5944 5945 /// Emits reduction combiner function: 5946 /// \code 5947 /// void @.red_comb(void* %arg0, void* %arg1) { 5948 /// %lhs = bitcast void* %arg0 to <type>* 5949 /// %rhs = bitcast void* %arg1 to <type>* 5950 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5951 /// store <type> %2, <type>* %lhs 5952 /// ret void 5953 /// } 5954 /// \endcode 5955 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5956 SourceLocation Loc, 5957 ReductionCodeGen &RCG, unsigned N, 5958 const Expr *ReductionOp, 5959 const Expr *LHS, const Expr *RHS, 5960 const Expr *PrivateRef) { 5961 ASTContext &C = CGM.getContext(); 5962 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5963 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5964 FunctionArgList Args; 5965 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5966 C.VoidPtrTy, ImplicitParamDecl::Other); 5967 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5968 ImplicitParamDecl::Other); 5969 Args.emplace_back(&ParamInOut); 5970 Args.emplace_back(&ParamIn); 5971 const auto &FnInfo = 5972 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5973 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5974 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5975 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5976 Name, &CGM.getModule()); 5977 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5978 Fn->setDoesNotRecurse(); 5979 CodeGenFunction CGF(CGM); 5980 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5981 llvm::Value *Size = nullptr; 5982 // If the size of the reduction item is non-constant, load it from global 5983 // threadprivate variable. 5984 if (RCG.getSizes(N).second) { 5985 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5986 CGF, CGM.getContext().getSizeType(), 5987 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5988 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5989 CGM.getContext().getSizeType(), Loc); 5990 } 5991 RCG.emitAggregateType(CGF, N, Size); 5992 // Remap lhs and rhs variables to the addresses of the function arguments. 5993 // %lhs = bitcast void* %arg0 to <type>* 5994 // %rhs = bitcast void* %arg1 to <type>* 5995 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5996 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5997 // Pull out the pointer to the variable. 5998 Address PtrAddr = CGF.EmitLoadOfPointer( 5999 CGF.GetAddrOfLocalVar(&ParamInOut), 6000 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6001 return CGF.Builder.CreateElementBitCast( 6002 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6003 }); 6004 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6005 // Pull out the pointer to the variable. 6006 Address PtrAddr = CGF.EmitLoadOfPointer( 6007 CGF.GetAddrOfLocalVar(&ParamIn), 6008 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6009 return CGF.Builder.CreateElementBitCast( 6010 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6011 }); 6012 PrivateScope.Privatize(); 6013 // Emit the combiner body: 6014 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6015 // store <type> %2, <type>* %lhs 6016 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6017 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6018 cast<DeclRefExpr>(RHS)); 6019 CGF.FinishFunction(); 6020 return Fn; 6021 } 6022 6023 /// Emits reduction finalizer function: 6024 /// \code 6025 /// void @.red_fini(void* %arg) { 6026 /// %0 = bitcast void* %arg to <type>* 6027 /// <destroy>(<type>* %0) 6028 /// ret void 6029 /// } 6030 /// \endcode 6031 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6032 SourceLocation Loc, 6033 ReductionCodeGen &RCG, unsigned N) { 6034 if (!RCG.needCleanups(N)) 6035 return nullptr; 6036 ASTContext &C = CGM.getContext(); 6037 FunctionArgList Args; 6038 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6039 ImplicitParamDecl::Other); 6040 Args.emplace_back(&Param); 6041 const auto &FnInfo = 6042 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6043 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6044 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6045 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6046 Name, &CGM.getModule()); 6047 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6048 Fn->setDoesNotRecurse(); 6049 CodeGenFunction CGF(CGM); 6050 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6051 Address PrivateAddr = CGF.EmitLoadOfPointer( 6052 CGF.GetAddrOfLocalVar(&Param), 6053 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6054 llvm::Value *Size = nullptr; 6055 // If the size of the reduction item is non-constant, load it from global 6056 // threadprivate variable. 6057 if (RCG.getSizes(N).second) { 6058 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6059 CGF, CGM.getContext().getSizeType(), 6060 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6061 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6062 CGM.getContext().getSizeType(), Loc); 6063 } 6064 RCG.emitAggregateType(CGF, N, Size); 6065 // Emit the finalizer body: 6066 // <destroy>(<type>* %0) 6067 RCG.emitCleanups(CGF, N, PrivateAddr); 6068 CGF.FinishFunction(); 6069 return Fn; 6070 } 6071 6072 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6073 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6074 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6075 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6076 return nullptr; 6077 6078 // Build typedef struct: 6079 // kmp_task_red_input { 6080 // void *reduce_shar; // shared reduction item 6081 // size_t reduce_size; // size of data item 6082 // void *reduce_init; // data initialization routine 6083 // void *reduce_fini; // data finalization routine 6084 // void *reduce_comb; // data combiner routine 6085 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6086 // } kmp_task_red_input_t; 6087 ASTContext &C = CGM.getContext(); 6088 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6089 RD->startDefinition(); 6090 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6091 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6092 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6093 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6094 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6095 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6096 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6097 RD->completeDefinition(); 6098 QualType RDType = C.getRecordType(RD); 6099 unsigned Size = Data.ReductionVars.size(); 6100 llvm::APInt ArraySize(/*numBits=*/64, Size); 6101 QualType ArrayRDType = C.getConstantArrayType( 6102 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6103 // kmp_task_red_input_t .rd_input.[Size]; 6104 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6105 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6106 Data.ReductionOps); 6107 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6108 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6109 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6110 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6111 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6112 TaskRedInput.getPointer(), Idxs, 6113 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6114 ".rd_input.gep."); 6115 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6116 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6117 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6118 RCG.emitSharedLValue(CGF, Cnt); 6119 llvm::Value *CastedShared = 6120 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6121 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6122 RCG.emitAggregateType(CGF, Cnt); 6123 llvm::Value *SizeValInChars; 6124 llvm::Value *SizeVal; 6125 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6126 // We use delayed creation/initialization for VLAs, array sections and 6127 // custom reduction initializations. It is required because runtime does not 6128 // provide the way to pass the sizes of VLAs/array sections to 6129 // initializer/combiner/finalizer functions and does not pass the pointer to 6130 // original reduction item to the initializer. Instead threadprivate global 6131 // variables are used to store these values and use them in the functions. 6132 bool DelayedCreation = !!SizeVal; 6133 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6134 /*isSigned=*/false); 6135 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6136 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6137 // ElemLVal.reduce_init = init; 6138 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6139 llvm::Value *InitAddr = 6140 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6141 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6142 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6143 // ElemLVal.reduce_fini = fini; 6144 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6145 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6146 llvm::Value *FiniAddr = Fini 6147 ? CGF.EmitCastToVoidPtr(Fini) 6148 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6149 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6150 // ElemLVal.reduce_comb = comb; 6151 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6152 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6153 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6154 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6155 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6156 // ElemLVal.flags = 0; 6157 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6158 if (DelayedCreation) { 6159 CGF.EmitStoreOfScalar( 6160 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 6161 FlagsLVal); 6162 } else 6163 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6164 } 6165 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6166 // *data); 6167 llvm::Value *Args[] = { 6168 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6169 /*isSigned=*/true), 6170 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6171 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6172 CGM.VoidPtrTy)}; 6173 return CGF.EmitRuntimeCall( 6174 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6175 } 6176 6177 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6178 SourceLocation Loc, 6179 ReductionCodeGen &RCG, 6180 unsigned N) { 6181 auto Sizes = RCG.getSizes(N); 6182 // Emit threadprivate global variable if the type is non-constant 6183 // (Sizes.second = nullptr). 6184 if (Sizes.second) { 6185 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6186 /*isSigned=*/false); 6187 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6188 CGF, CGM.getContext().getSizeType(), 6189 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6190 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6191 } 6192 // Store address of the original reduction item if custom initializer is used. 6193 if (RCG.usesReductionInitializer(N)) { 6194 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6195 CGF, CGM.getContext().VoidPtrTy, 6196 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6197 CGF.Builder.CreateStore( 6198 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6199 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6200 SharedAddr, /*IsVolatile=*/false); 6201 } 6202 } 6203 6204 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6205 SourceLocation Loc, 6206 llvm::Value *ReductionsPtr, 6207 LValue SharedLVal) { 6208 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6209 // *d); 6210 llvm::Value *Args[] = { 6211 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6212 /*isSigned=*/true), 6213 ReductionsPtr, 6214 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6215 CGM.VoidPtrTy)}; 6216 return Address( 6217 CGF.EmitRuntimeCall( 6218 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6219 SharedLVal.getAlignment()); 6220 } 6221 6222 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6223 SourceLocation Loc) { 6224 if (!CGF.HaveInsertPoint()) 6225 return; 6226 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6227 // global_tid); 6228 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6229 // Ignore return result until untied tasks are supported. 6230 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6231 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6232 Region->emitUntiedSwitch(CGF); 6233 } 6234 6235 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6236 OpenMPDirectiveKind InnerKind, 6237 const RegionCodeGenTy &CodeGen, 6238 bool HasCancel) { 6239 if (!CGF.HaveInsertPoint()) 6240 return; 6241 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6242 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6243 } 6244 6245 namespace { 6246 enum RTCancelKind { 6247 CancelNoreq = 0, 6248 CancelParallel = 1, 6249 CancelLoop = 2, 6250 CancelSections = 3, 6251 CancelTaskgroup = 4 6252 }; 6253 } // anonymous namespace 6254 6255 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6256 RTCancelKind CancelKind = CancelNoreq; 6257 if (CancelRegion == OMPD_parallel) 6258 CancelKind = CancelParallel; 6259 else if (CancelRegion == OMPD_for) 6260 CancelKind = CancelLoop; 6261 else if (CancelRegion == OMPD_sections) 6262 CancelKind = CancelSections; 6263 else { 6264 assert(CancelRegion == OMPD_taskgroup); 6265 CancelKind = CancelTaskgroup; 6266 } 6267 return CancelKind; 6268 } 6269 6270 void CGOpenMPRuntime::emitCancellationPointCall( 6271 CodeGenFunction &CGF, SourceLocation Loc, 6272 OpenMPDirectiveKind CancelRegion) { 6273 if (!CGF.HaveInsertPoint()) 6274 return; 6275 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6276 // global_tid, kmp_int32 cncl_kind); 6277 if (auto *OMPRegionInfo = 6278 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6279 // For 'cancellation point taskgroup', the task region info may not have a 6280 // cancel. This may instead happen in another adjacent task. 6281 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6282 llvm::Value *Args[] = { 6283 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6284 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6285 // Ignore return result until untied tasks are supported. 6286 llvm::Value *Result = CGF.EmitRuntimeCall( 6287 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6288 // if (__kmpc_cancellationpoint()) { 6289 // exit from construct; 6290 // } 6291 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6292 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6293 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6294 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6295 CGF.EmitBlock(ExitBB); 6296 // exit from construct; 6297 CodeGenFunction::JumpDest CancelDest = 6298 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6299 CGF.EmitBranchThroughCleanup(CancelDest); 6300 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6301 } 6302 } 6303 } 6304 6305 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6306 const Expr *IfCond, 6307 OpenMPDirectiveKind CancelRegion) { 6308 if (!CGF.HaveInsertPoint()) 6309 return; 6310 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6311 // kmp_int32 cncl_kind); 6312 if (auto *OMPRegionInfo = 6313 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6314 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6315 PrePostActionTy &) { 6316 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6317 llvm::Value *Args[] = { 6318 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6319 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6320 // Ignore return result until untied tasks are supported. 6321 llvm::Value *Result = CGF.EmitRuntimeCall( 6322 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6323 // if (__kmpc_cancel()) { 6324 // exit from construct; 6325 // } 6326 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6327 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6328 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6329 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6330 CGF.EmitBlock(ExitBB); 6331 // exit from construct; 6332 CodeGenFunction::JumpDest CancelDest = 6333 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6334 CGF.EmitBranchThroughCleanup(CancelDest); 6335 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6336 }; 6337 if (IfCond) { 6338 emitOMPIfClause(CGF, IfCond, ThenGen, 6339 [](CodeGenFunction &, PrePostActionTy &) {}); 6340 } else { 6341 RegionCodeGenTy ThenRCG(ThenGen); 6342 ThenRCG(CGF); 6343 } 6344 } 6345 } 6346 6347 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6348 const OMPExecutableDirective &D, StringRef ParentName, 6349 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6350 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6351 assert(!ParentName.empty() && "Invalid target region parent name!"); 6352 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6353 IsOffloadEntry, CodeGen); 6354 } 6355 6356 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6357 const OMPExecutableDirective &D, StringRef ParentName, 6358 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6359 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6360 // Create a unique name for the entry function using the source location 6361 // information of the current target region. The name will be something like: 6362 // 6363 // __omp_offloading_DD_FFFF_PP_lBB 6364 // 6365 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6366 // mangled name of the function that encloses the target region and BB is the 6367 // line number of the target region. 6368 6369 unsigned DeviceID; 6370 unsigned FileID; 6371 unsigned Line; 6372 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6373 Line); 6374 SmallString<64> EntryFnName; 6375 { 6376 llvm::raw_svector_ostream OS(EntryFnName); 6377 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6378 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6379 } 6380 6381 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6382 6383 CodeGenFunction CGF(CGM, true); 6384 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6385 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6386 6387 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6388 6389 // If this target outline function is not an offload entry, we don't need to 6390 // register it. 6391 if (!IsOffloadEntry) 6392 return; 6393 6394 // The target region ID is used by the runtime library to identify the current 6395 // target region, so it only has to be unique and not necessarily point to 6396 // anything. It could be the pointer to the outlined function that implements 6397 // the target region, but we aren't using that so that the compiler doesn't 6398 // need to keep that, and could therefore inline the host function if proven 6399 // worthwhile during optimization. In the other hand, if emitting code for the 6400 // device, the ID has to be the function address so that it can retrieved from 6401 // the offloading entry and launched by the runtime library. We also mark the 6402 // outlined function to have external linkage in case we are emitting code for 6403 // the device, because these functions will be entry points to the device. 6404 6405 if (CGM.getLangOpts().OpenMPIsDevice) { 6406 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6407 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6408 OutlinedFn->setDSOLocal(false); 6409 } else { 6410 std::string Name = getName({EntryFnName, "region_id"}); 6411 OutlinedFnID = new llvm::GlobalVariable( 6412 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6413 llvm::GlobalValue::WeakAnyLinkage, 6414 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6415 } 6416 6417 // Register the information for the entry associated with this target region. 6418 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6419 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6420 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6421 } 6422 6423 /// discard all CompoundStmts intervening between two constructs 6424 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 6425 while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 6426 Body = CS->body_front(); 6427 6428 return Body; 6429 } 6430 6431 /// Emit the number of teams for a target directive. Inspect the num_teams 6432 /// clause associated with a teams construct combined or closely nested 6433 /// with the target directive. 6434 /// 6435 /// Emit a team of size one for directives such as 'target parallel' that 6436 /// have no associated teams construct. 6437 /// 6438 /// Otherwise, return nullptr. 6439 static llvm::Value * 6440 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6441 CodeGenFunction &CGF, 6442 const OMPExecutableDirective &D) { 6443 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6444 "teams directive expected to be " 6445 "emitted only for the host!"); 6446 6447 CGBuilderTy &Bld = CGF.Builder; 6448 6449 // If the target directive is combined with a teams directive: 6450 // Return the value in the num_teams clause, if any. 6451 // Otherwise, return 0 to denote the runtime default. 6452 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 6453 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 6454 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6455 llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 6456 /*IgnoreResultAssign*/ true); 6457 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6458 /*IsSigned=*/true); 6459 } 6460 6461 // The default value is 0. 6462 return Bld.getInt32(0); 6463 } 6464 6465 // If the target directive is combined with a parallel directive but not a 6466 // teams directive, start one team. 6467 if (isOpenMPParallelDirective(D.getDirectiveKind())) 6468 return Bld.getInt32(1); 6469 6470 // If the current target region has a teams region enclosed, we need to get 6471 // the number of teams to pass to the runtime function call. This is done 6472 // by generating the expression in a inlined region. This is required because 6473 // the expression is captured in the enclosing target environment when the 6474 // teams directive is not combined with target. 6475 6476 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6477 6478 if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6479 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6480 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6481 if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 6482 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6483 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6484 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 6485 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 6486 /*IsSigned=*/true); 6487 } 6488 6489 // If we have an enclosed teams directive but no num_teams clause we use 6490 // the default value 0. 6491 return Bld.getInt32(0); 6492 } 6493 } 6494 6495 // No teams associated with the directive. 6496 return nullptr; 6497 } 6498 6499 /// Emit the number of threads for a target directive. Inspect the 6500 /// thread_limit clause associated with a teams construct combined or closely 6501 /// nested with the target directive. 6502 /// 6503 /// Emit the num_threads clause for directives such as 'target parallel' that 6504 /// have no associated teams construct. 6505 /// 6506 /// Otherwise, return nullptr. 6507 static llvm::Value * 6508 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 6509 CodeGenFunction &CGF, 6510 const OMPExecutableDirective &D) { 6511 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 6512 "teams directive expected to be " 6513 "emitted only for the host!"); 6514 6515 CGBuilderTy &Bld = CGF.Builder; 6516 6517 // 6518 // If the target directive is combined with a teams directive: 6519 // Return the value in the thread_limit clause, if any. 6520 // 6521 // If the target directive is combined with a parallel directive: 6522 // Return the value in the num_threads clause, if any. 6523 // 6524 // If both clauses are set, select the minimum of the two. 6525 // 6526 // If neither teams or parallel combined directives set the number of threads 6527 // in a team, return 0 to denote the runtime default. 6528 // 6529 // If this is not a teams directive return nullptr. 6530 6531 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 6532 isOpenMPParallelDirective(D.getDirectiveKind())) { 6533 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 6534 llvm::Value *NumThreadsVal = nullptr; 6535 llvm::Value *ThreadLimitVal = nullptr; 6536 6537 if (const auto *ThreadLimitClause = 6538 D.getSingleClause<OMPThreadLimitClause>()) { 6539 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6540 llvm::Value *ThreadLimit = 6541 CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 6542 /*IgnoreResultAssign*/ true); 6543 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6544 /*IsSigned=*/true); 6545 } 6546 6547 if (const auto *NumThreadsClause = 6548 D.getSingleClause<OMPNumThreadsClause>()) { 6549 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6550 llvm::Value *NumThreads = 6551 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 6552 /*IgnoreResultAssign*/ true); 6553 NumThreadsVal = 6554 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 6555 } 6556 6557 // Select the lesser of thread_limit and num_threads. 6558 if (NumThreadsVal) 6559 ThreadLimitVal = ThreadLimitVal 6560 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 6561 ThreadLimitVal), 6562 NumThreadsVal, ThreadLimitVal) 6563 : NumThreadsVal; 6564 6565 // Set default value passed to the runtime if either teams or a target 6566 // parallel type directive is found but no clause is specified. 6567 if (!ThreadLimitVal) 6568 ThreadLimitVal = DefaultThreadLimitVal; 6569 6570 return ThreadLimitVal; 6571 } 6572 6573 // If the current target region has a teams region enclosed, we need to get 6574 // the thread limit to pass to the runtime function call. This is done 6575 // by generating the expression in a inlined region. This is required because 6576 // the expression is captured in the enclosing target environment when the 6577 // teams directive is not combined with target. 6578 6579 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6580 6581 if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( 6582 ignoreCompoundStmts(CS.getCapturedStmt()))) { 6583 if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { 6584 if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 6585 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 6586 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6587 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 6588 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 6589 /*IsSigned=*/true); 6590 } 6591 6592 // If we have an enclosed teams directive but no thread_limit clause we 6593 // use the default value 0. 6594 return CGF.Builder.getInt32(0); 6595 } 6596 } 6597 6598 // No teams associated with the directive. 6599 return nullptr; 6600 } 6601 6602 namespace { 6603 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6604 6605 // Utility to handle information from clauses associated with a given 6606 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6607 // It provides a convenient interface to obtain the information and generate 6608 // code for that information. 6609 class MappableExprsHandler { 6610 public: 6611 /// Values for bit flags used to specify the mapping type for 6612 /// offloading. 6613 enum OpenMPOffloadMappingFlags : uint64_t { 6614 /// No flags 6615 OMP_MAP_NONE = 0x0, 6616 /// Allocate memory on the device and move data from host to device. 6617 OMP_MAP_TO = 0x01, 6618 /// Allocate memory on the device and move data from device to host. 6619 OMP_MAP_FROM = 0x02, 6620 /// Always perform the requested mapping action on the element, even 6621 /// if it was already mapped before. 6622 OMP_MAP_ALWAYS = 0x04, 6623 /// Delete the element from the device environment, ignoring the 6624 /// current reference count associated with the element. 6625 OMP_MAP_DELETE = 0x08, 6626 /// The element being mapped is a pointer-pointee pair; both the 6627 /// pointer and the pointee should be mapped. 6628 OMP_MAP_PTR_AND_OBJ = 0x10, 6629 /// This flags signals that the base address of an entry should be 6630 /// passed to the target kernel as an argument. 6631 OMP_MAP_TARGET_PARAM = 0x20, 6632 /// Signal that the runtime library has to return the device pointer 6633 /// in the current position for the data being mapped. Used when we have the 6634 /// use_device_ptr clause. 6635 OMP_MAP_RETURN_PARAM = 0x40, 6636 /// This flag signals that the reference being passed is a pointer to 6637 /// private data. 6638 OMP_MAP_PRIVATE = 0x80, 6639 /// Pass the element to the device by value. 6640 OMP_MAP_LITERAL = 0x100, 6641 /// Implicit map 6642 OMP_MAP_IMPLICIT = 0x200, 6643 /// The 16 MSBs of the flags indicate whether the entry is member of some 6644 /// struct/class. 6645 OMP_MAP_MEMBER_OF = 0xffff000000000000, 6646 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 6647 }; 6648 6649 /// Class that associates information with a base pointer to be passed to the 6650 /// runtime library. 6651 class BasePointerInfo { 6652 /// The base pointer. 6653 llvm::Value *Ptr = nullptr; 6654 /// The base declaration that refers to this device pointer, or null if 6655 /// there is none. 6656 const ValueDecl *DevPtrDecl = nullptr; 6657 6658 public: 6659 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 6660 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 6661 llvm::Value *operator*() const { return Ptr; } 6662 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 6663 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 6664 }; 6665 6666 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 6667 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 6668 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 6669 6670 /// Map between a struct and the its lowest & highest elements which have been 6671 /// mapped. 6672 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6673 /// HE(FieldIndex, Pointer)} 6674 struct StructRangeInfoTy { 6675 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6676 0, Address::invalid()}; 6677 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 6678 0, Address::invalid()}; 6679 Address Base = Address::invalid(); 6680 }; 6681 6682 private: 6683 /// Kind that defines how a device pointer has to be returned. 6684 struct MapInfo { 6685 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6686 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6687 ArrayRef<OpenMPMapModifierKind> MapModifiers; 6688 bool ReturnDevicePointer = false; 6689 bool IsImplicit = false; 6690 6691 MapInfo() = default; 6692 MapInfo( 6693 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6694 OpenMPMapClauseKind MapType, 6695 ArrayRef<OpenMPMapModifierKind> MapModifiers, 6696 bool ReturnDevicePointer, bool IsImplicit) 6697 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 6698 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 6699 }; 6700 6701 /// If use_device_ptr is used on a pointer which is a struct member and there 6702 /// is no map information about it, then emission of that entry is deferred 6703 /// until the whole struct has been processed. 6704 struct DeferredDevicePtrEntryTy { 6705 const Expr *IE = nullptr; 6706 const ValueDecl *VD = nullptr; 6707 6708 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 6709 : IE(IE), VD(VD) {} 6710 }; 6711 6712 /// Directive from where the map clauses were extracted. 6713 const OMPExecutableDirective &CurDir; 6714 6715 /// Function the directive is being generated for. 6716 CodeGenFunction &CGF; 6717 6718 /// Set of all first private variables in the current directive. 6719 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 6720 6721 /// Map between device pointer declarations and their expression components. 6722 /// The key value for declarations in 'this' is null. 6723 llvm::DenseMap< 6724 const ValueDecl *, 6725 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6726 DevPointersMap; 6727 6728 llvm::Value *getExprTypeSize(const Expr *E) const { 6729 QualType ExprTy = E->getType().getCanonicalType(); 6730 6731 // Reference types are ignored for mapping purposes. 6732 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6733 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6734 6735 // Given that an array section is considered a built-in type, we need to 6736 // do the calculation based on the length of the section instead of relying 6737 // on CGF.getTypeSize(E->getType()). 6738 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 6739 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 6740 OAE->getBase()->IgnoreParenImpCasts()) 6741 .getCanonicalType(); 6742 6743 // If there is no length associated with the expression, that means we 6744 // are using the whole length of the base. 6745 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 6746 return CGF.getTypeSize(BaseTy); 6747 6748 llvm::Value *ElemSize; 6749 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 6750 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6751 } else { 6752 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6753 assert(ATy && "Expecting array type if not a pointer type."); 6754 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6755 } 6756 6757 // If we don't have a length at this point, that is because we have an 6758 // array section with a single element. 6759 if (!OAE->getLength()) 6760 return ElemSize; 6761 6762 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 6763 LengthVal = 6764 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 6765 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6766 } 6767 return CGF.getTypeSize(ExprTy); 6768 } 6769 6770 /// Return the corresponding bits for a given map clause modifier. Add 6771 /// a flag marking the map as a pointer if requested. Add a flag marking the 6772 /// map as the first one of a series of maps that relate to the same map 6773 /// expression. 6774 OpenMPOffloadMappingFlags getMapTypeBits( 6775 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6776 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 6777 OpenMPOffloadMappingFlags Bits = 6778 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 6779 switch (MapType) { 6780 case OMPC_MAP_alloc: 6781 case OMPC_MAP_release: 6782 // alloc and release is the default behavior in the runtime library, i.e. 6783 // if we don't pass any bits alloc/release that is what the runtime is 6784 // going to do. Therefore, we don't need to signal anything for these two 6785 // type modifiers. 6786 break; 6787 case OMPC_MAP_to: 6788 Bits |= OMP_MAP_TO; 6789 break; 6790 case OMPC_MAP_from: 6791 Bits |= OMP_MAP_FROM; 6792 break; 6793 case OMPC_MAP_tofrom: 6794 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 6795 break; 6796 case OMPC_MAP_delete: 6797 Bits |= OMP_MAP_DELETE; 6798 break; 6799 case OMPC_MAP_unknown: 6800 llvm_unreachable("Unexpected map type!"); 6801 } 6802 if (AddPtrFlag) 6803 Bits |= OMP_MAP_PTR_AND_OBJ; 6804 if (AddIsTargetParamFlag) 6805 Bits |= OMP_MAP_TARGET_PARAM; 6806 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 6807 != MapModifiers.end()) 6808 Bits |= OMP_MAP_ALWAYS; 6809 return Bits; 6810 } 6811 6812 /// Return true if the provided expression is a final array section. A 6813 /// final array section, is one whose length can't be proved to be one. 6814 bool isFinalArraySectionExpression(const Expr *E) const { 6815 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 6816 6817 // It is not an array section and therefore not a unity-size one. 6818 if (!OASE) 6819 return false; 6820 6821 // An array section with no colon always refer to a single element. 6822 if (OASE->getColonLoc().isInvalid()) 6823 return false; 6824 6825 const Expr *Length = OASE->getLength(); 6826 6827 // If we don't have a length we have to check if the array has size 1 6828 // for this dimension. Also, we should always expect a length if the 6829 // base type is pointer. 6830 if (!Length) { 6831 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 6832 OASE->getBase()->IgnoreParenImpCasts()) 6833 .getCanonicalType(); 6834 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6835 return ATy->getSize().getSExtValue() != 1; 6836 // If we don't have a constant dimension length, we have to consider 6837 // the current section as having any size, so it is not necessarily 6838 // unitary. If it happen to be unity size, that's user fault. 6839 return true; 6840 } 6841 6842 // Check if the length evaluates to 1. 6843 Expr::EvalResult Result; 6844 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 6845 return true; // Can have more that size 1. 6846 6847 llvm::APSInt ConstLength = Result.Val.getInt(); 6848 return ConstLength.getSExtValue() != 1; 6849 } 6850 6851 /// Generate the base pointers, section pointers, sizes and map type 6852 /// bits for the provided map type, map modifier, and expression components. 6853 /// \a IsFirstComponent should be set to true if the provided set of 6854 /// components is the first associated with a capture. 6855 void generateInfoForComponentList( 6856 OpenMPMapClauseKind MapType, 6857 ArrayRef<OpenMPMapModifierKind> MapModifiers, 6858 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6859 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 6860 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 6861 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 6862 bool IsImplicit, 6863 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 6864 OverlappedElements = llvm::None) const { 6865 // The following summarizes what has to be generated for each map and the 6866 // types below. The generated information is expressed in this order: 6867 // base pointer, section pointer, size, flags 6868 // (to add to the ones that come from the map type and modifier). 6869 // 6870 // double d; 6871 // int i[100]; 6872 // float *p; 6873 // 6874 // struct S1 { 6875 // int i; 6876 // float f[50]; 6877 // } 6878 // struct S2 { 6879 // int i; 6880 // float f[50]; 6881 // S1 s; 6882 // double *p; 6883 // struct S2 *ps; 6884 // } 6885 // S2 s; 6886 // S2 *ps; 6887 // 6888 // map(d) 6889 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 6890 // 6891 // map(i) 6892 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 6893 // 6894 // map(i[1:23]) 6895 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 6896 // 6897 // map(p) 6898 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 6899 // 6900 // map(p[1:24]) 6901 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 6902 // 6903 // map(s) 6904 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 6905 // 6906 // map(s.i) 6907 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 6908 // 6909 // map(s.s.f) 6910 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6911 // 6912 // map(s.p) 6913 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 6914 // 6915 // map(to: s.p[:22]) 6916 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 6917 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 6918 // &(s.p), &(s.p[0]), 22*sizeof(double), 6919 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6920 // (*) alloc space for struct members, only this is a target parameter 6921 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6922 // optimizes this entry out, same in the examples below) 6923 // (***) map the pointee (map: to) 6924 // 6925 // map(s.ps) 6926 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6927 // 6928 // map(from: s.ps->s.i) 6929 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6930 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6931 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6932 // 6933 // map(to: s.ps->ps) 6934 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6935 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6936 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 6937 // 6938 // map(s.ps->ps->ps) 6939 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6940 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6941 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6942 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6943 // 6944 // map(to: s.ps->ps->s.f[:22]) 6945 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6946 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6947 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6948 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6949 // 6950 // map(ps) 6951 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 6952 // 6953 // map(ps->i) 6954 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 6955 // 6956 // map(ps->s.f) 6957 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6958 // 6959 // map(from: ps->p) 6960 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 6961 // 6962 // map(to: ps->p[:22]) 6963 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 6964 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 6965 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 6966 // 6967 // map(ps->ps) 6968 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6969 // 6970 // map(from: ps->ps->s.i) 6971 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6972 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6973 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6974 // 6975 // map(from: ps->ps->ps) 6976 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6977 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6978 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6979 // 6980 // map(ps->ps->ps->ps) 6981 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6982 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6983 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6984 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6985 // 6986 // map(to: ps->ps->ps->s.f[:22]) 6987 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6988 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6989 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6990 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6991 // 6992 // map(to: s.f[:22]) map(from: s.p[:33]) 6993 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 6994 // sizeof(double*) (**), TARGET_PARAM 6995 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 6996 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 6997 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6998 // (*) allocate contiguous space needed to fit all mapped members even if 6999 // we allocate space for members not mapped (in this example, 7000 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7001 // them as well because they fall between &s.f[0] and &s.p) 7002 // 7003 // map(from: s.f[:22]) map(to: ps->p[:33]) 7004 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7005 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7006 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7007 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7008 // (*) the struct this entry pertains to is the 2nd element in the list of 7009 // arguments, hence MEMBER_OF(2) 7010 // 7011 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7012 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7013 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7014 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7015 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7016 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7017 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7018 // (*) the struct this entry pertains to is the 4th element in the list 7019 // of arguments, hence MEMBER_OF(4) 7020 7021 // Track if the map information being generated is the first for a capture. 7022 bool IsCaptureFirstInfo = IsFirstComponentList; 7023 bool IsLink = false; // Is this variable a "declare target link"? 7024 7025 // Scan the components from the base to the complete expression. 7026 auto CI = Components.rbegin(); 7027 auto CE = Components.rend(); 7028 auto I = CI; 7029 7030 // Track if the map information being generated is the first for a list of 7031 // components. 7032 bool IsExpressionFirstInfo = true; 7033 Address BP = Address::invalid(); 7034 const Expr *AssocExpr = I->getAssociatedExpression(); 7035 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7036 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7037 7038 if (isa<MemberExpr>(AssocExpr)) { 7039 // The base is the 'this' pointer. The content of the pointer is going 7040 // to be the base of the field being mapped. 7041 BP = CGF.LoadCXXThisAddress(); 7042 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7043 (OASE && 7044 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7045 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7046 } else { 7047 // The base is the reference to the variable. 7048 // BP = &Var. 7049 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7050 if (const auto *VD = 7051 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7052 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7053 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) 7054 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { 7055 IsLink = true; 7056 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 7057 } 7058 } 7059 7060 // If the variable is a pointer and is being dereferenced (i.e. is not 7061 // the last component), the base has to be the pointer itself, not its 7062 // reference. References are ignored for mapping purposes. 7063 QualType Ty = 7064 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7065 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7066 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7067 7068 // We do not need to generate individual map information for the 7069 // pointer, it can be associated with the combined storage. 7070 ++I; 7071 } 7072 } 7073 7074 // Track whether a component of the list should be marked as MEMBER_OF some 7075 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7076 // in a component list should be marked as MEMBER_OF, all subsequent entries 7077 // do not belong to the base struct. E.g. 7078 // struct S2 s; 7079 // s.ps->ps->ps->f[:] 7080 // (1) (2) (3) (4) 7081 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7082 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7083 // is the pointee of ps(2) which is not member of struct s, so it should not 7084 // be marked as such (it is still PTR_AND_OBJ). 7085 // The variable is initialized to false so that PTR_AND_OBJ entries which 7086 // are not struct members are not considered (e.g. array of pointers to 7087 // data). 7088 bool ShouldBeMemberOf = false; 7089 7090 // Variable keeping track of whether or not we have encountered a component 7091 // in the component list which is a member expression. Useful when we have a 7092 // pointer or a final array section, in which case it is the previous 7093 // component in the list which tells us whether we have a member expression. 7094 // E.g. X.f[:] 7095 // While processing the final array section "[:]" it is "f" which tells us 7096 // whether we are dealing with a member of a declared struct. 7097 const MemberExpr *EncounteredME = nullptr; 7098 7099 for (; I != CE; ++I) { 7100 // If the current component is member of a struct (parent struct) mark it. 7101 if (!EncounteredME) { 7102 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7103 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7104 // as MEMBER_OF the parent struct. 7105 if (EncounteredME) 7106 ShouldBeMemberOf = true; 7107 } 7108 7109 auto Next = std::next(I); 7110 7111 // We need to generate the addresses and sizes if this is the last 7112 // component, if the component is a pointer or if it is an array section 7113 // whose length can't be proved to be one. If this is a pointer, it 7114 // becomes the base address for the following components. 7115 7116 // A final array section, is one whose length can't be proved to be one. 7117 bool IsFinalArraySection = 7118 isFinalArraySectionExpression(I->getAssociatedExpression()); 7119 7120 // Get information on whether the element is a pointer. Have to do a 7121 // special treatment for array sections given that they are built-in 7122 // types. 7123 const auto *OASE = 7124 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7125 bool IsPointer = 7126 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7127 .getCanonicalType() 7128 ->isAnyPointerType()) || 7129 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7130 7131 if (Next == CE || IsPointer || IsFinalArraySection) { 7132 // If this is not the last component, we expect the pointer to be 7133 // associated with an array expression or member expression. 7134 assert((Next == CE || 7135 isa<MemberExpr>(Next->getAssociatedExpression()) || 7136 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7137 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7138 "Unexpected expression"); 7139 7140 Address LB = 7141 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7142 7143 // If this component is a pointer inside the base struct then we don't 7144 // need to create any entry for it - it will be combined with the object 7145 // it is pointing to into a single PTR_AND_OBJ entry. 7146 bool IsMemberPointer = 7147 IsPointer && EncounteredME && 7148 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7149 EncounteredME); 7150 if (!OverlappedElements.empty()) { 7151 // Handle base element with the info for overlapped elements. 7152 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7153 assert(Next == CE && 7154 "Expected last element for the overlapped elements."); 7155 assert(!IsPointer && 7156 "Unexpected base element with the pointer type."); 7157 // Mark the whole struct as the struct that requires allocation on the 7158 // device. 7159 PartialStruct.LowestElem = {0, LB}; 7160 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7161 I->getAssociatedExpression()->getType()); 7162 Address HB = CGF.Builder.CreateConstGEP( 7163 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7164 CGF.VoidPtrTy), 7165 TypeSize.getQuantity() - 1); 7166 PartialStruct.HighestElem = { 7167 std::numeric_limits<decltype( 7168 PartialStruct.HighestElem.first)>::max(), 7169 HB}; 7170 PartialStruct.Base = BP; 7171 // Emit data for non-overlapped data. 7172 OpenMPOffloadMappingFlags Flags = 7173 OMP_MAP_MEMBER_OF | 7174 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7175 /*AddPtrFlag=*/false, 7176 /*AddIsTargetParamFlag=*/false); 7177 LB = BP; 7178 llvm::Value *Size = nullptr; 7179 // Do bitcopy of all non-overlapped structure elements. 7180 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7181 Component : OverlappedElements) { 7182 Address ComponentLB = Address::invalid(); 7183 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7184 Component) { 7185 if (MC.getAssociatedDeclaration()) { 7186 ComponentLB = 7187 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7188 .getAddress(); 7189 Size = CGF.Builder.CreatePtrDiff( 7190 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7191 CGF.EmitCastToVoidPtr(LB.getPointer())); 7192 break; 7193 } 7194 } 7195 BasePointers.push_back(BP.getPointer()); 7196 Pointers.push_back(LB.getPointer()); 7197 Sizes.push_back(Size); 7198 Types.push_back(Flags); 7199 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7200 } 7201 BasePointers.push_back(BP.getPointer()); 7202 Pointers.push_back(LB.getPointer()); 7203 Size = CGF.Builder.CreatePtrDiff( 7204 CGF.EmitCastToVoidPtr( 7205 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7206 CGF.EmitCastToVoidPtr(LB.getPointer())); 7207 Sizes.push_back(Size); 7208 Types.push_back(Flags); 7209 break; 7210 } 7211 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7212 if (!IsMemberPointer) { 7213 BasePointers.push_back(BP.getPointer()); 7214 Pointers.push_back(LB.getPointer()); 7215 Sizes.push_back(Size); 7216 7217 // We need to add a pointer flag for each map that comes from the 7218 // same expression except for the first one. We also need to signal 7219 // this map is the first one that relates with the current capture 7220 // (there is a set of entries for each capture). 7221 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7222 MapType, MapModifiers, IsImplicit, 7223 !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); 7224 7225 if (!IsExpressionFirstInfo) { 7226 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7227 // then we reset the TO/FROM/ALWAYS/DELETE flags. 7228 if (IsPointer) 7229 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7230 OMP_MAP_DELETE); 7231 7232 if (ShouldBeMemberOf) { 7233 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7234 // should be later updated with the correct value of MEMBER_OF. 7235 Flags |= OMP_MAP_MEMBER_OF; 7236 // From now on, all subsequent PTR_AND_OBJ entries should not be 7237 // marked as MEMBER_OF. 7238 ShouldBeMemberOf = false; 7239 } 7240 } 7241 7242 Types.push_back(Flags); 7243 } 7244 7245 // If we have encountered a member expression so far, keep track of the 7246 // mapped member. If the parent is "*this", then the value declaration 7247 // is nullptr. 7248 if (EncounteredME) { 7249 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7250 unsigned FieldIndex = FD->getFieldIndex(); 7251 7252 // Update info about the lowest and highest elements for this struct 7253 if (!PartialStruct.Base.isValid()) { 7254 PartialStruct.LowestElem = {FieldIndex, LB}; 7255 PartialStruct.HighestElem = {FieldIndex, LB}; 7256 PartialStruct.Base = BP; 7257 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7258 PartialStruct.LowestElem = {FieldIndex, LB}; 7259 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7260 PartialStruct.HighestElem = {FieldIndex, LB}; 7261 } 7262 } 7263 7264 // If we have a final array section, we are done with this expression. 7265 if (IsFinalArraySection) 7266 break; 7267 7268 // The pointer becomes the base for the next element. 7269 if (Next != CE) 7270 BP = LB; 7271 7272 IsExpressionFirstInfo = false; 7273 IsCaptureFirstInfo = false; 7274 } 7275 } 7276 } 7277 7278 /// Return the adjusted map modifiers if the declaration a capture refers to 7279 /// appears in a first-private clause. This is expected to be used only with 7280 /// directives that start with 'target'. 7281 MappableExprsHandler::OpenMPOffloadMappingFlags 7282 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7283 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7284 7285 // A first private variable captured by reference will use only the 7286 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7287 // declaration is known as first-private in this handler. 7288 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7289 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7290 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7291 return MappableExprsHandler::OMP_MAP_ALWAYS | 7292 MappableExprsHandler::OMP_MAP_TO; 7293 return MappableExprsHandler::OMP_MAP_PRIVATE | 7294 MappableExprsHandler::OMP_MAP_TO; 7295 } 7296 return MappableExprsHandler::OMP_MAP_TO | 7297 MappableExprsHandler::OMP_MAP_FROM; 7298 } 7299 7300 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7301 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. 7302 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7303 << 48); 7304 } 7305 7306 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7307 OpenMPOffloadMappingFlags MemberOfFlag) { 7308 // If the entry is PTR_AND_OBJ but has not been marked with the special 7309 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7310 // marked as MEMBER_OF. 7311 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7312 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7313 return; 7314 7315 // Reset the placeholder value to prepare the flag for the assignment of the 7316 // proper MEMBER_OF value. 7317 Flags &= ~OMP_MAP_MEMBER_OF; 7318 Flags |= MemberOfFlag; 7319 } 7320 7321 void getPlainLayout(const CXXRecordDecl *RD, 7322 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7323 bool AsBase) const { 7324 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7325 7326 llvm::StructType *St = 7327 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7328 7329 unsigned NumElements = St->getNumElements(); 7330 llvm::SmallVector< 7331 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7332 RecordLayout(NumElements); 7333 7334 // Fill bases. 7335 for (const auto &I : RD->bases()) { 7336 if (I.isVirtual()) 7337 continue; 7338 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7339 // Ignore empty bases. 7340 if (Base->isEmpty() || CGF.getContext() 7341 .getASTRecordLayout(Base) 7342 .getNonVirtualSize() 7343 .isZero()) 7344 continue; 7345 7346 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7347 RecordLayout[FieldIndex] = Base; 7348 } 7349 // Fill in virtual bases. 7350 for (const auto &I : RD->vbases()) { 7351 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7352 // Ignore empty bases. 7353 if (Base->isEmpty()) 7354 continue; 7355 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7356 if (RecordLayout[FieldIndex]) 7357 continue; 7358 RecordLayout[FieldIndex] = Base; 7359 } 7360 // Fill in all the fields. 7361 assert(!RD->isUnion() && "Unexpected union."); 7362 for (const auto *Field : RD->fields()) { 7363 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7364 // will fill in later.) 7365 if (!Field->isBitField()) { 7366 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7367 RecordLayout[FieldIndex] = Field; 7368 } 7369 } 7370 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7371 &Data : RecordLayout) { 7372 if (Data.isNull()) 7373 continue; 7374 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7375 getPlainLayout(Base, Layout, /*AsBase=*/true); 7376 else 7377 Layout.push_back(Data.get<const FieldDecl *>()); 7378 } 7379 } 7380 7381 public: 7382 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7383 : CurDir(Dir), CGF(CGF) { 7384 // Extract firstprivate clause information. 7385 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7386 for (const auto *D : C->varlists()) 7387 FirstPrivateDecls.insert( 7388 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 7389 // Extract device pointer clause information. 7390 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7391 for (auto L : C->component_lists()) 7392 DevPointersMap[L.first].push_back(L.second); 7393 } 7394 7395 /// Generate code for the combined entry if we have a partially mapped struct 7396 /// and take care of the mapping flags of the arguments corresponding to 7397 /// individual struct members. 7398 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7399 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7400 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7401 const StructRangeInfoTy &PartialStruct) const { 7402 // Base is the base of the struct 7403 BasePointers.push_back(PartialStruct.Base.getPointer()); 7404 // Pointer is the address of the lowest element 7405 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7406 Pointers.push_back(LB); 7407 // Size is (addr of {highest+1} element) - (addr of lowest element) 7408 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7409 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7410 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7411 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7412 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7413 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy, 7414 /*isSinged=*/false); 7415 Sizes.push_back(Size); 7416 // Map type is always TARGET_PARAM 7417 Types.push_back(OMP_MAP_TARGET_PARAM); 7418 // Remove TARGET_PARAM flag from the first element 7419 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7420 7421 // All other current entries will be MEMBER_OF the combined entry 7422 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7423 // 0xFFFF in the MEMBER_OF field). 7424 OpenMPOffloadMappingFlags MemberOfFlag = 7425 getMemberOfFlag(BasePointers.size() - 1); 7426 for (auto &M : CurTypes) 7427 setCorrectMemberOfFlag(M, MemberOfFlag); 7428 } 7429 7430 /// Generate all the base pointers, section pointers, sizes and map 7431 /// types for the extracted mappable expressions. Also, for each item that 7432 /// relates with a device pointer, a pair of the relevant declaration and 7433 /// index where it occurs is appended to the device pointers info array. 7434 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7435 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7436 MapFlagsArrayTy &Types) const { 7437 // We have to process the component lists that relate with the same 7438 // declaration in a single chunk so that we can generate the map flags 7439 // correctly. Therefore, we organize all lists in a map. 7440 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7441 7442 // Helper function to fill the information map for the different supported 7443 // clauses. 7444 auto &&InfoGen = [&Info]( 7445 const ValueDecl *D, 7446 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7447 OpenMPMapClauseKind MapType, 7448 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7449 bool ReturnDevicePointer, bool IsImplicit) { 7450 const ValueDecl *VD = 7451 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7452 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7453 IsImplicit); 7454 }; 7455 7456 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7457 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7458 for (const auto &L : C->component_lists()) { 7459 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7460 /*ReturnDevicePointer=*/false, C->isImplicit()); 7461 } 7462 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 7463 for (const auto &L : C->component_lists()) { 7464 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7465 /*ReturnDevicePointer=*/false, C->isImplicit()); 7466 } 7467 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 7468 for (const auto &L : C->component_lists()) { 7469 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7470 /*ReturnDevicePointer=*/false, C->isImplicit()); 7471 } 7472 7473 // Look at the use_device_ptr clause information and mark the existing map 7474 // entries as such. If there is no map information for an entry in the 7475 // use_device_ptr list, we create one with map type 'alloc' and zero size 7476 // section. It is the user fault if that was not mapped before. If there is 7477 // no map information and the pointer is a struct member, then we defer the 7478 // emission of that entry until the whole struct has been processed. 7479 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7480 DeferredInfo; 7481 7482 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7483 for (const auto *C : 7484 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { 7485 for (const auto &L : C->component_lists()) { 7486 assert(!L.second.empty() && "Not expecting empty list of components!"); 7487 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7488 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7489 const Expr *IE = L.second.back().getAssociatedExpression(); 7490 // If the first component is a member expression, we have to look into 7491 // 'this', which maps to null in the map of map information. Otherwise 7492 // look directly for the information. 7493 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7494 7495 // We potentially have map information for this declaration already. 7496 // Look for the first set of components that refer to it. 7497 if (It != Info.end()) { 7498 auto CI = std::find_if( 7499 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7500 return MI.Components.back().getAssociatedDeclaration() == VD; 7501 }); 7502 // If we found a map entry, signal that the pointer has to be returned 7503 // and move on to the next declaration. 7504 if (CI != It->second.end()) { 7505 CI->ReturnDevicePointer = true; 7506 continue; 7507 } 7508 } 7509 7510 // We didn't find any match in our map information - generate a zero 7511 // size array section - if the pointer is a struct member we defer this 7512 // action until the whole struct has been processed. 7513 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 7514 if (isa<MemberExpr>(IE)) { 7515 // Insert the pointer into Info to be processed by 7516 // generateInfoForComponentList. Because it is a member pointer 7517 // without a pointee, no entry will be generated for it, therefore 7518 // we need to generate one after the whole struct has been processed. 7519 // Nonetheless, generateInfoForComponentList must be called to take 7520 // the pointer into account for the calculation of the range of the 7521 // partial struct. 7522 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 7523 /*ReturnDevicePointer=*/false, C->isImplicit()); 7524 DeferredInfo[nullptr].emplace_back(IE, VD); 7525 } else { 7526 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7527 this->CGF.EmitLValue(IE), IE->getExprLoc()); 7528 BasePointers.emplace_back(Ptr, VD); 7529 Pointers.push_back(Ptr); 7530 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7531 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 7532 } 7533 } 7534 } 7535 7536 for (const auto &M : Info) { 7537 // We need to know when we generate information for the first component 7538 // associated with a capture, because the mapping flags depend on it. 7539 bool IsFirstComponentList = true; 7540 7541 // Temporary versions of arrays 7542 MapBaseValuesArrayTy CurBasePointers; 7543 MapValuesArrayTy CurPointers; 7544 MapValuesArrayTy CurSizes; 7545 MapFlagsArrayTy CurTypes; 7546 StructRangeInfoTy PartialStruct; 7547 7548 for (const MapInfo &L : M.second) { 7549 assert(!L.Components.empty() && 7550 "Not expecting declaration with no component lists."); 7551 7552 // Remember the current base pointer index. 7553 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 7554 // FIXME: MSVC 2013 seems to require this-> to find the member method. 7555 this->generateInfoForComponentList( 7556 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 7557 CurPointers, CurSizes, CurTypes, PartialStruct, 7558 IsFirstComponentList, L.IsImplicit); 7559 7560 // If this entry relates with a device pointer, set the relevant 7561 // declaration and add the 'return pointer' flag. 7562 if (L.ReturnDevicePointer) { 7563 assert(CurBasePointers.size() > CurrentBasePointersIdx && 7564 "Unexpected number of mapped base pointers."); 7565 7566 const ValueDecl *RelevantVD = 7567 L.Components.back().getAssociatedDeclaration(); 7568 assert(RelevantVD && 7569 "No relevant declaration related with device pointer??"); 7570 7571 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 7572 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 7573 } 7574 IsFirstComponentList = false; 7575 } 7576 7577 // Append any pending zero-length pointers which are struct members and 7578 // used with use_device_ptr. 7579 auto CI = DeferredInfo.find(M.first); 7580 if (CI != DeferredInfo.end()) { 7581 for (const DeferredDevicePtrEntryTy &L : CI->second) { 7582 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 7583 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7584 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 7585 CurBasePointers.emplace_back(BasePtr, L.VD); 7586 CurPointers.push_back(Ptr); 7587 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 7588 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 7589 // value MEMBER_OF=FFFF so that the entry is later updated with the 7590 // correct value of MEMBER_OF. 7591 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 7592 OMP_MAP_MEMBER_OF); 7593 } 7594 } 7595 7596 // If there is an entry in PartialStruct it means we have a struct with 7597 // individual members mapped. Emit an extra combined entry. 7598 if (PartialStruct.Base.isValid()) 7599 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 7600 PartialStruct); 7601 7602 // We need to append the results of this capture to what we already have. 7603 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 7604 Pointers.append(CurPointers.begin(), CurPointers.end()); 7605 Sizes.append(CurSizes.begin(), CurSizes.end()); 7606 Types.append(CurTypes.begin(), CurTypes.end()); 7607 } 7608 } 7609 7610 /// Emit capture info for lambdas for variables captured by reference. 7611 void generateInfoForLambdaCaptures( 7612 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 7613 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7614 MapFlagsArrayTy &Types, 7615 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 7616 const auto *RD = VD->getType() 7617 .getCanonicalType() 7618 .getNonReferenceType() 7619 ->getAsCXXRecordDecl(); 7620 if (!RD || !RD->isLambda()) 7621 return; 7622 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 7623 LValue VDLVal = CGF.MakeAddrLValue( 7624 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 7625 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 7626 FieldDecl *ThisCapture = nullptr; 7627 RD->getCaptureFields(Captures, ThisCapture); 7628 if (ThisCapture) { 7629 LValue ThisLVal = 7630 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 7631 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 7632 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 7633 BasePointers.push_back(ThisLVal.getPointer()); 7634 Pointers.push_back(ThisLValVal.getPointer()); 7635 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 7636 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 7637 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 7638 } 7639 for (const LambdaCapture &LC : RD->captures()) { 7640 if (LC.getCaptureKind() != LCK_ByRef) 7641 continue; 7642 const VarDecl *VD = LC.getCapturedVar(); 7643 auto It = Captures.find(VD); 7644 assert(It != Captures.end() && "Found lambda capture without field."); 7645 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 7646 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 7647 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 7648 BasePointers.push_back(VarLVal.getPointer()); 7649 Pointers.push_back(VarLValVal.getPointer()); 7650 Sizes.push_back(CGF.getTypeSize( 7651 VD->getType().getCanonicalType().getNonReferenceType())); 7652 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 7653 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 7654 } 7655 } 7656 7657 /// Set correct indices for lambdas captures. 7658 void adjustMemberOfForLambdaCaptures( 7659 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 7660 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7661 MapFlagsArrayTy &Types) const { 7662 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 7663 // Set correct member_of idx for all implicit lambda captures. 7664 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 7665 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 7666 continue; 7667 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 7668 assert(BasePtr && "Unable to find base lambda address."); 7669 int TgtIdx = -1; 7670 for (unsigned J = I; J > 0; --J) { 7671 unsigned Idx = J - 1; 7672 if (Pointers[Idx] != BasePtr) 7673 continue; 7674 TgtIdx = Idx; 7675 break; 7676 } 7677 assert(TgtIdx != -1 && "Unable to find parent lambda."); 7678 // All other current entries will be MEMBER_OF the combined entry 7679 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7680 // 0xFFFF in the MEMBER_OF field). 7681 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 7682 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 7683 } 7684 } 7685 7686 /// Generate the base pointers, section pointers, sizes and map types 7687 /// associated to a given capture. 7688 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 7689 llvm::Value *Arg, 7690 MapBaseValuesArrayTy &BasePointers, 7691 MapValuesArrayTy &Pointers, 7692 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7693 StructRangeInfoTy &PartialStruct) const { 7694 assert(!Cap->capturesVariableArrayType() && 7695 "Not expecting to generate map info for a variable array type!"); 7696 7697 // We need to know when we generating information for the first component 7698 const ValueDecl *VD = Cap->capturesThis() 7699 ? nullptr 7700 : Cap->getCapturedVar()->getCanonicalDecl(); 7701 7702 // If this declaration appears in a is_device_ptr clause we just have to 7703 // pass the pointer by value. If it is a reference to a declaration, we just 7704 // pass its value. 7705 if (DevPointersMap.count(VD)) { 7706 BasePointers.emplace_back(Arg, VD); 7707 Pointers.push_back(Arg); 7708 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 7709 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 7710 return; 7711 } 7712 7713 using MapData = 7714 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 7715 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 7716 SmallVector<MapData, 4> DeclComponentLists; 7717 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7718 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 7719 for (const auto &L : C->decl_component_lists(VD)) { 7720 assert(L.first == VD && 7721 "We got information for the wrong declaration??"); 7722 assert(!L.second.empty() && 7723 "Not expecting declaration with no component lists."); 7724 DeclComponentLists.emplace_back(L.second, C->getMapType(), 7725 C->getMapTypeModifiers(), 7726 C->isImplicit()); 7727 } 7728 } 7729 7730 // Find overlapping elements (including the offset from the base element). 7731 llvm::SmallDenseMap< 7732 const MapData *, 7733 llvm::SmallVector< 7734 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 7735 4> 7736 OverlappedData; 7737 size_t Count = 0; 7738 for (const MapData &L : DeclComponentLists) { 7739 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7740 OpenMPMapClauseKind MapType; 7741 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7742 bool IsImplicit; 7743 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 7744 ++Count; 7745 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 7746 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 7747 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 7748 auto CI = Components.rbegin(); 7749 auto CE = Components.rend(); 7750 auto SI = Components1.rbegin(); 7751 auto SE = Components1.rend(); 7752 for (; CI != CE && SI != SE; ++CI, ++SI) { 7753 if (CI->getAssociatedExpression()->getStmtClass() != 7754 SI->getAssociatedExpression()->getStmtClass()) 7755 break; 7756 // Are we dealing with different variables/fields? 7757 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 7758 break; 7759 } 7760 // Found overlapping if, at least for one component, reached the head of 7761 // the components list. 7762 if (CI == CE || SI == SE) { 7763 assert((CI != CE || SI != SE) && 7764 "Unexpected full match of the mapping components."); 7765 const MapData &BaseData = CI == CE ? L : L1; 7766 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 7767 SI == SE ? Components : Components1; 7768 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 7769 OverlappedElements.getSecond().push_back(SubData); 7770 } 7771 } 7772 } 7773 // Sort the overlapped elements for each item. 7774 llvm::SmallVector<const FieldDecl *, 4> Layout; 7775 if (!OverlappedData.empty()) { 7776 if (const auto *CRD = 7777 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 7778 getPlainLayout(CRD, Layout, /*AsBase=*/false); 7779 else { 7780 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 7781 Layout.append(RD->field_begin(), RD->field_end()); 7782 } 7783 } 7784 for (auto &Pair : OverlappedData) { 7785 llvm::sort( 7786 Pair.getSecond(), 7787 [&Layout]( 7788 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 7789 OMPClauseMappableExprCommon::MappableExprComponentListRef 7790 Second) { 7791 auto CI = First.rbegin(); 7792 auto CE = First.rend(); 7793 auto SI = Second.rbegin(); 7794 auto SE = Second.rend(); 7795 for (; CI != CE && SI != SE; ++CI, ++SI) { 7796 if (CI->getAssociatedExpression()->getStmtClass() != 7797 SI->getAssociatedExpression()->getStmtClass()) 7798 break; 7799 // Are we dealing with different variables/fields? 7800 if (CI->getAssociatedDeclaration() != 7801 SI->getAssociatedDeclaration()) 7802 break; 7803 } 7804 7805 // Lists contain the same elements. 7806 if (CI == CE && SI == SE) 7807 return false; 7808 7809 // List with less elements is less than list with more elements. 7810 if (CI == CE || SI == SE) 7811 return CI == CE; 7812 7813 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 7814 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 7815 if (FD1->getParent() == FD2->getParent()) 7816 return FD1->getFieldIndex() < FD2->getFieldIndex(); 7817 const auto It = 7818 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 7819 return FD == FD1 || FD == FD2; 7820 }); 7821 return *It == FD1; 7822 }); 7823 } 7824 7825 // Associated with a capture, because the mapping flags depend on it. 7826 // Go through all of the elements with the overlapped elements. 7827 for (const auto &Pair : OverlappedData) { 7828 const MapData &L = *Pair.getFirst(); 7829 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7830 OpenMPMapClauseKind MapType; 7831 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7832 bool IsImplicit; 7833 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 7834 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7835 OverlappedComponents = Pair.getSecond(); 7836 bool IsFirstComponentList = true; 7837 generateInfoForComponentList(MapType, MapModifiers, Components, 7838 BasePointers, Pointers, Sizes, Types, 7839 PartialStruct, IsFirstComponentList, 7840 IsImplicit, OverlappedComponents); 7841 } 7842 // Go through other elements without overlapped elements. 7843 bool IsFirstComponentList = OverlappedData.empty(); 7844 for (const MapData &L : DeclComponentLists) { 7845 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7846 OpenMPMapClauseKind MapType; 7847 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7848 bool IsImplicit; 7849 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 7850 auto It = OverlappedData.find(&L); 7851 if (It == OverlappedData.end()) 7852 generateInfoForComponentList(MapType, MapModifiers, Components, 7853 BasePointers, Pointers, Sizes, Types, 7854 PartialStruct, IsFirstComponentList, 7855 IsImplicit); 7856 IsFirstComponentList = false; 7857 } 7858 } 7859 7860 /// Generate the base pointers, section pointers, sizes and map types 7861 /// associated with the declare target link variables. 7862 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 7863 MapValuesArrayTy &Pointers, 7864 MapValuesArrayTy &Sizes, 7865 MapFlagsArrayTy &Types) const { 7866 // Map other list items in the map clause which are not captured variables 7867 // but "declare target link" global variables., 7868 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 7869 for (const auto &L : C->component_lists()) { 7870 if (!L.first) 7871 continue; 7872 const auto *VD = dyn_cast<VarDecl>(L.first); 7873 if (!VD) 7874 continue; 7875 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7876 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 7877 if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 7878 continue; 7879 StructRangeInfoTy PartialStruct; 7880 generateInfoForComponentList( 7881 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 7882 Pointers, Sizes, Types, PartialStruct, 7883 /*IsFirstComponentList=*/true, C->isImplicit()); 7884 assert(!PartialStruct.Base.isValid() && 7885 "No partial structs for declare target link expected."); 7886 } 7887 } 7888 } 7889 7890 /// Generate the default map information for a given capture \a CI, 7891 /// record field declaration \a RI and captured value \a CV. 7892 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 7893 const FieldDecl &RI, llvm::Value *CV, 7894 MapBaseValuesArrayTy &CurBasePointers, 7895 MapValuesArrayTy &CurPointers, 7896 MapValuesArrayTy &CurSizes, 7897 MapFlagsArrayTy &CurMapTypes) const { 7898 // Do the default mapping. 7899 if (CI.capturesThis()) { 7900 CurBasePointers.push_back(CV); 7901 CurPointers.push_back(CV); 7902 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 7903 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 7904 // Default map type. 7905 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 7906 } else if (CI.capturesVariableByCopy()) { 7907 CurBasePointers.push_back(CV); 7908 CurPointers.push_back(CV); 7909 if (!RI.getType()->isAnyPointerType()) { 7910 // We have to signal to the runtime captures passed by value that are 7911 // not pointers. 7912 CurMapTypes.push_back(OMP_MAP_LITERAL); 7913 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 7914 } else { 7915 // Pointers are implicitly mapped with a zero size and no flags 7916 // (other than first map that is added for all implicit maps). 7917 CurMapTypes.push_back(OMP_MAP_NONE); 7918 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 7919 } 7920 } else { 7921 assert(CI.capturesVariable() && "Expected captured reference."); 7922 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 7923 QualType ElementType = PtrTy->getPointeeType(); 7924 CurSizes.push_back(CGF.getTypeSize(ElementType)); 7925 // The default map type for a scalar/complex type is 'to' because by 7926 // default the value doesn't have to be retrieved. For an aggregate 7927 // type, the default is 'tofrom'. 7928 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 7929 const VarDecl *VD = CI.getCapturedVar(); 7930 if (FirstPrivateDecls.count(VD) && 7931 VD->getType().isConstant(CGF.getContext())) { 7932 llvm::Constant *Addr = 7933 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 7934 // Copy the value of the original variable to the new global copy. 7935 CGF.Builder.CreateMemCpy( 7936 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 7937 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 7938 CurSizes.back(), 7939 /*isVolatile=*/false); 7940 // Use new global variable as the base pointers. 7941 CurBasePointers.push_back(Addr); 7942 CurPointers.push_back(Addr); 7943 } else { 7944 CurBasePointers.push_back(CV); 7945 CurPointers.push_back(CV); 7946 } 7947 } 7948 // Every default map produces a single argument which is a target parameter. 7949 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 7950 7951 // Add flag stating this is an implicit map. 7952 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 7953 } 7954 }; 7955 7956 enum OpenMPOffloadingReservedDeviceIDs { 7957 /// Device ID if the device was not defined, runtime should get it 7958 /// from environment variables in the spec. 7959 OMP_DEVICEID_UNDEF = -1, 7960 }; 7961 } // anonymous namespace 7962 7963 /// Emit the arrays used to pass the captures and map information to the 7964 /// offloading runtime library. If there is no map or capture information, 7965 /// return nullptr by reference. 7966 static void 7967 emitOffloadingArrays(CodeGenFunction &CGF, 7968 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 7969 MappableExprsHandler::MapValuesArrayTy &Pointers, 7970 MappableExprsHandler::MapValuesArrayTy &Sizes, 7971 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 7972 CGOpenMPRuntime::TargetDataInfo &Info) { 7973 CodeGenModule &CGM = CGF.CGM; 7974 ASTContext &Ctx = CGF.getContext(); 7975 7976 // Reset the array information. 7977 Info.clearArrayInfo(); 7978 Info.NumberOfPtrs = BasePointers.size(); 7979 7980 if (Info.NumberOfPtrs) { 7981 // Detect if we have any capture size requiring runtime evaluation of the 7982 // size so that a constant array could be eventually used. 7983 bool hasRuntimeEvaluationCaptureSize = false; 7984 for (llvm::Value *S : Sizes) 7985 if (!isa<llvm::Constant>(S)) { 7986 hasRuntimeEvaluationCaptureSize = true; 7987 break; 7988 } 7989 7990 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 7991 QualType PointerArrayType = 7992 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 7993 /*IndexTypeQuals=*/0); 7994 7995 Info.BasePointersArray = 7996 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 7997 Info.PointersArray = 7998 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 7999 8000 // If we don't have any VLA types or other types that require runtime 8001 // evaluation, we can use a constant array for the map sizes, otherwise we 8002 // need to fill up the arrays as we do for the pointers. 8003 if (hasRuntimeEvaluationCaptureSize) { 8004 QualType SizeArrayType = Ctx.getConstantArrayType( 8005 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 8006 /*IndexTypeQuals=*/0); 8007 Info.SizesArray = 8008 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8009 } else { 8010 // We expect all the sizes to be constant, so we collect them to create 8011 // a constant array. 8012 SmallVector<llvm::Constant *, 16> ConstSizes; 8013 for (llvm::Value *S : Sizes) 8014 ConstSizes.push_back(cast<llvm::Constant>(S)); 8015 8016 auto *SizesArrayInit = llvm::ConstantArray::get( 8017 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 8018 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8019 auto *SizesArrayGbl = new llvm::GlobalVariable( 8020 CGM.getModule(), SizesArrayInit->getType(), 8021 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8022 SizesArrayInit, Name); 8023 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8024 Info.SizesArray = SizesArrayGbl; 8025 } 8026 8027 // The map types are always constant so we don't need to generate code to 8028 // fill arrays. Instead, we create an array constant. 8029 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8030 llvm::copy(MapTypes, Mapping.begin()); 8031 llvm::Constant *MapTypesArrayInit = 8032 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8033 std::string MaptypesName = 8034 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8035 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8036 CGM.getModule(), MapTypesArrayInit->getType(), 8037 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8038 MapTypesArrayInit, MaptypesName); 8039 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8040 Info.MapTypesArray = MapTypesArrayGbl; 8041 8042 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8043 llvm::Value *BPVal = *BasePointers[I]; 8044 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8045 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8046 Info.BasePointersArray, 0, I); 8047 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8048 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8049 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8050 CGF.Builder.CreateStore(BPVal, BPAddr); 8051 8052 if (Info.requiresDevicePointerInfo()) 8053 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8054 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8055 8056 llvm::Value *PVal = Pointers[I]; 8057 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8058 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8059 Info.PointersArray, 0, I); 8060 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8061 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8062 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8063 CGF.Builder.CreateStore(PVal, PAddr); 8064 8065 if (hasRuntimeEvaluationCaptureSize) { 8066 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8067 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 8068 Info.SizesArray, 8069 /*Idx0=*/0, 8070 /*Idx1=*/I); 8071 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 8072 CGF.Builder.CreateStore( 8073 CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), 8074 SAddr); 8075 } 8076 } 8077 } 8078 } 8079 /// Emit the arguments to be passed to the runtime library based on the 8080 /// arrays of pointers, sizes and map types. 8081 static void emitOffloadingArraysArgument( 8082 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8083 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8084 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8085 CodeGenModule &CGM = CGF.CGM; 8086 if (Info.NumberOfPtrs) { 8087 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8088 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8089 Info.BasePointersArray, 8090 /*Idx0=*/0, /*Idx1=*/0); 8091 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8092 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8093 Info.PointersArray, 8094 /*Idx0=*/0, 8095 /*Idx1=*/0); 8096 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8097 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 8098 /*Idx0=*/0, /*Idx1=*/0); 8099 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8100 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8101 Info.MapTypesArray, 8102 /*Idx0=*/0, 8103 /*Idx1=*/0); 8104 } else { 8105 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8106 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8107 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 8108 MapTypesArrayArg = 8109 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8110 } 8111 } 8112 8113 /// Checks if the expression is constant or does not have non-trivial function 8114 /// calls. 8115 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 8116 // We can skip constant expressions. 8117 // We can skip expressions with trivial calls or simple expressions. 8118 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 8119 !E->hasNonTrivialCall(Ctx)) && 8120 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 8121 } 8122 8123 /// Checks if the \p Body is the \a CompoundStmt and returns its child statement 8124 /// iff there is only one that is not evaluatable at the compile time. 8125 static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { 8126 if (const auto *C = dyn_cast<CompoundStmt>(Body)) { 8127 const Stmt *Child = nullptr; 8128 for (const Stmt *S : C->body()) { 8129 if (const auto *E = dyn_cast<Expr>(S)) { 8130 if (isTrivial(Ctx, E)) 8131 continue; 8132 } 8133 // Some of the statements can be ignored. 8134 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 8135 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 8136 continue; 8137 // Analyze declarations. 8138 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 8139 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 8140 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 8141 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 8142 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 8143 isa<UsingDirectiveDecl>(D) || 8144 isa<OMPDeclareReductionDecl>(D) || 8145 isa<OMPThreadPrivateDecl>(D)) 8146 return true; 8147 const auto *VD = dyn_cast<VarDecl>(D); 8148 if (!VD) 8149 return false; 8150 return VD->isConstexpr() || 8151 ((VD->getType().isTrivialType(Ctx) || 8152 VD->getType()->isReferenceType()) && 8153 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 8154 })) 8155 continue; 8156 } 8157 // Found multiple children - cannot get the one child only. 8158 if (Child) 8159 return Body; 8160 Child = S; 8161 } 8162 if (Child) 8163 return Child; 8164 } 8165 return Body; 8166 } 8167 8168 /// Check for inner distribute directive. 8169 static const OMPExecutableDirective * 8170 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8171 const auto *CS = D.getInnermostCapturedStmt(); 8172 const auto *Body = 8173 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8174 const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); 8175 8176 if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { 8177 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8178 switch (D.getDirectiveKind()) { 8179 case OMPD_target: 8180 if (isOpenMPDistributeDirective(DKind)) 8181 return NestedDir; 8182 if (DKind == OMPD_teams) { 8183 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8184 /*IgnoreCaptured=*/true); 8185 if (!Body) 8186 return nullptr; 8187 ChildStmt = getSingleCompoundChild(Ctx, Body); 8188 if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { 8189 DKind = NND->getDirectiveKind(); 8190 if (isOpenMPDistributeDirective(DKind)) 8191 return NND; 8192 } 8193 } 8194 return nullptr; 8195 case OMPD_target_teams: 8196 if (isOpenMPDistributeDirective(DKind)) 8197 return NestedDir; 8198 return nullptr; 8199 case OMPD_target_parallel: 8200 case OMPD_target_simd: 8201 case OMPD_target_parallel_for: 8202 case OMPD_target_parallel_for_simd: 8203 return nullptr; 8204 case OMPD_target_teams_distribute: 8205 case OMPD_target_teams_distribute_simd: 8206 case OMPD_target_teams_distribute_parallel_for: 8207 case OMPD_target_teams_distribute_parallel_for_simd: 8208 case OMPD_parallel: 8209 case OMPD_for: 8210 case OMPD_parallel_for: 8211 case OMPD_parallel_sections: 8212 case OMPD_for_simd: 8213 case OMPD_parallel_for_simd: 8214 case OMPD_cancel: 8215 case OMPD_cancellation_point: 8216 case OMPD_ordered: 8217 case OMPD_threadprivate: 8218 case OMPD_task: 8219 case OMPD_simd: 8220 case OMPD_sections: 8221 case OMPD_section: 8222 case OMPD_single: 8223 case OMPD_master: 8224 case OMPD_critical: 8225 case OMPD_taskyield: 8226 case OMPD_barrier: 8227 case OMPD_taskwait: 8228 case OMPD_taskgroup: 8229 case OMPD_atomic: 8230 case OMPD_flush: 8231 case OMPD_teams: 8232 case OMPD_target_data: 8233 case OMPD_target_exit_data: 8234 case OMPD_target_enter_data: 8235 case OMPD_distribute: 8236 case OMPD_distribute_simd: 8237 case OMPD_distribute_parallel_for: 8238 case OMPD_distribute_parallel_for_simd: 8239 case OMPD_teams_distribute: 8240 case OMPD_teams_distribute_simd: 8241 case OMPD_teams_distribute_parallel_for: 8242 case OMPD_teams_distribute_parallel_for_simd: 8243 case OMPD_target_update: 8244 case OMPD_declare_simd: 8245 case OMPD_declare_target: 8246 case OMPD_end_declare_target: 8247 case OMPD_declare_reduction: 8248 case OMPD_declare_mapper: 8249 case OMPD_taskloop: 8250 case OMPD_taskloop_simd: 8251 case OMPD_requires: 8252 case OMPD_unknown: 8253 llvm_unreachable("Unexpected directive."); 8254 } 8255 } 8256 8257 return nullptr; 8258 } 8259 8260 void CGOpenMPRuntime::emitTargetNumIterationsCall( 8261 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 8262 const llvm::function_ref<llvm::Value *( 8263 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 8264 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 8265 const OMPExecutableDirective *TD = &D; 8266 // Get nested teams distribute kind directive, if any. 8267 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 8268 TD = getNestedDistributeDirective(CGM.getContext(), D); 8269 if (!TD) 8270 return; 8271 const auto *LD = cast<OMPLoopDirective>(TD); 8272 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 8273 PrePostActionTy &) { 8274 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 8275 8276 // Emit device ID if any. 8277 llvm::Value *DeviceID; 8278 if (Device) 8279 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8280 CGF.Int64Ty, /*isSigned=*/true); 8281 else 8282 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8283 8284 llvm::Value *Args[] = {DeviceID, NumIterations}; 8285 CGF.EmitRuntimeCall( 8286 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 8287 }; 8288 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 8289 } 8290 8291 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 8292 const OMPExecutableDirective &D, 8293 llvm::Function *OutlinedFn, 8294 llvm::Value *OutlinedFnID, 8295 const Expr *IfCond, const Expr *Device) { 8296 if (!CGF.HaveInsertPoint()) 8297 return; 8298 8299 assert(OutlinedFn && "Invalid outlined function!"); 8300 8301 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 8302 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 8303 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 8304 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 8305 PrePostActionTy &) { 8306 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8307 }; 8308 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 8309 8310 CodeGenFunction::OMPTargetDataInfo InputInfo; 8311 llvm::Value *MapTypesArray = nullptr; 8312 // Fill up the pointer arrays and transfer execution to the device. 8313 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 8314 &MapTypesArray, &CS, RequiresOuterTask, 8315 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 8316 // On top of the arrays that were filled up, the target offloading call 8317 // takes as arguments the device id as well as the host pointer. The host 8318 // pointer is used by the runtime library to identify the current target 8319 // region, so it only has to be unique and not necessarily point to 8320 // anything. It could be the pointer to the outlined function that 8321 // implements the target region, but we aren't using that so that the 8322 // compiler doesn't need to keep that, and could therefore inline the host 8323 // function if proven worthwhile during optimization. 8324 8325 // From this point on, we need to have an ID of the target region defined. 8326 assert(OutlinedFnID && "Invalid outlined function ID!"); 8327 8328 // Emit device ID if any. 8329 llvm::Value *DeviceID; 8330 if (Device) { 8331 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8332 CGF.Int64Ty, /*isSigned=*/true); 8333 } else { 8334 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8335 } 8336 8337 // Emit the number of elements in the offloading arrays. 8338 llvm::Value *PointerNum = 8339 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8340 8341 // Return value of the runtime offloading call. 8342 llvm::Value *Return; 8343 8344 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); 8345 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); 8346 8347 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8348 // The target region is an outlined function launched by the runtime 8349 // via calls __tgt_target() or __tgt_target_teams(). 8350 // 8351 // __tgt_target() launches a target region with one team and one thread, 8352 // executing a serial region. This master thread may in turn launch 8353 // more threads within its team upon encountering a parallel region, 8354 // however, no additional teams can be launched on the device. 8355 // 8356 // __tgt_target_teams() launches a target region with one or more teams, 8357 // each with one or more threads. This call is required for target 8358 // constructs such as: 8359 // 'target teams' 8360 // 'target' / 'teams' 8361 // 'target teams distribute parallel for' 8362 // 'target parallel' 8363 // and so on. 8364 // 8365 // Note that on the host and CPU targets, the runtime implementation of 8366 // these calls simply call the outlined function without forking threads. 8367 // The outlined functions themselves have runtime calls to 8368 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 8369 // the compiler in emitTeamsCall() and emitParallelCall(). 8370 // 8371 // In contrast, on the NVPTX target, the implementation of 8372 // __tgt_target_teams() launches a GPU kernel with the requested number 8373 // of teams and threads so no additional calls to the runtime are required. 8374 if (NumTeams) { 8375 // If we have NumTeams defined this means that we have an enclosed teams 8376 // region. Therefore we also expect to have NumThreads defined. These two 8377 // values should be defined in the presence of a teams directive, 8378 // regardless of having any clauses associated. If the user is using teams 8379 // but no clauses, these two values will be the default that should be 8380 // passed to the runtime library - a 32-bit integer with the value zero. 8381 assert(NumThreads && "Thread limit expression should be available along " 8382 "with number of teams."); 8383 llvm::Value *OffloadingArgs[] = {DeviceID, 8384 OutlinedFnID, 8385 PointerNum, 8386 InputInfo.BasePointersArray.getPointer(), 8387 InputInfo.PointersArray.getPointer(), 8388 InputInfo.SizesArray.getPointer(), 8389 MapTypesArray, 8390 NumTeams, 8391 NumThreads}; 8392 Return = CGF.EmitRuntimeCall( 8393 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 8394 : OMPRTL__tgt_target_teams), 8395 OffloadingArgs); 8396 } else { 8397 llvm::Value *OffloadingArgs[] = {DeviceID, 8398 OutlinedFnID, 8399 PointerNum, 8400 InputInfo.BasePointersArray.getPointer(), 8401 InputInfo.PointersArray.getPointer(), 8402 InputInfo.SizesArray.getPointer(), 8403 MapTypesArray}; 8404 Return = CGF.EmitRuntimeCall( 8405 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 8406 : OMPRTL__tgt_target), 8407 OffloadingArgs); 8408 } 8409 8410 // Check the error code and execute the host version if required. 8411 llvm::BasicBlock *OffloadFailedBlock = 8412 CGF.createBasicBlock("omp_offload.failed"); 8413 llvm::BasicBlock *OffloadContBlock = 8414 CGF.createBasicBlock("omp_offload.cont"); 8415 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 8416 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 8417 8418 CGF.EmitBlock(OffloadFailedBlock); 8419 if (RequiresOuterTask) { 8420 CapturedVars.clear(); 8421 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8422 } 8423 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8424 CGF.EmitBranch(OffloadContBlock); 8425 8426 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 8427 }; 8428 8429 // Notify that the host version must be executed. 8430 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 8431 RequiresOuterTask](CodeGenFunction &CGF, 8432 PrePostActionTy &) { 8433 if (RequiresOuterTask) { 8434 CapturedVars.clear(); 8435 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8436 } 8437 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8438 }; 8439 8440 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 8441 &CapturedVars, RequiresOuterTask, 8442 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 8443 // Fill up the arrays with all the captured variables. 8444 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8445 MappableExprsHandler::MapValuesArrayTy Pointers; 8446 MappableExprsHandler::MapValuesArrayTy Sizes; 8447 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8448 8449 // Get mappable expression information. 8450 MappableExprsHandler MEHandler(D, CGF); 8451 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 8452 8453 auto RI = CS.getCapturedRecordDecl()->field_begin(); 8454 auto CV = CapturedVars.begin(); 8455 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 8456 CE = CS.capture_end(); 8457 CI != CE; ++CI, ++RI, ++CV) { 8458 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 8459 MappableExprsHandler::MapValuesArrayTy CurPointers; 8460 MappableExprsHandler::MapValuesArrayTy CurSizes; 8461 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 8462 MappableExprsHandler::StructRangeInfoTy PartialStruct; 8463 8464 // VLA sizes are passed to the outlined region by copy and do not have map 8465 // information associated. 8466 if (CI->capturesVariableArrayType()) { 8467 CurBasePointers.push_back(*CV); 8468 CurPointers.push_back(*CV); 8469 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 8470 // Copy to the device as an argument. No need to retrieve it. 8471 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 8472 MappableExprsHandler::OMP_MAP_TARGET_PARAM); 8473 } else { 8474 // If we have any information in the map clause, we use it, otherwise we 8475 // just do a default mapping. 8476 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 8477 CurSizes, CurMapTypes, PartialStruct); 8478 if (CurBasePointers.empty()) 8479 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 8480 CurPointers, CurSizes, CurMapTypes); 8481 // Generate correct mapping for variables captured by reference in 8482 // lambdas. 8483 if (CI->capturesVariable()) 8484 MEHandler.generateInfoForLambdaCaptures( 8485 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 8486 CurMapTypes, LambdaPointers); 8487 } 8488 // We expect to have at least an element of information for this capture. 8489 assert(!CurBasePointers.empty() && 8490 "Non-existing map pointer for capture!"); 8491 assert(CurBasePointers.size() == CurPointers.size() && 8492 CurBasePointers.size() == CurSizes.size() && 8493 CurBasePointers.size() == CurMapTypes.size() && 8494 "Inconsistent map information sizes!"); 8495 8496 // If there is an entry in PartialStruct it means we have a struct with 8497 // individual members mapped. Emit an extra combined entry. 8498 if (PartialStruct.Base.isValid()) 8499 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 8500 CurMapTypes, PartialStruct); 8501 8502 // We need to append the results of this capture to what we already have. 8503 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8504 Pointers.append(CurPointers.begin(), CurPointers.end()); 8505 Sizes.append(CurSizes.begin(), CurSizes.end()); 8506 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 8507 } 8508 // Adjust MEMBER_OF flags for the lambdas captures. 8509 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 8510 Pointers, MapTypes); 8511 // Map other list items in the map clause which are not captured variables 8512 // but "declare target link" global variables. 8513 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 8514 MapTypes); 8515 8516 TargetDataInfo Info; 8517 // Fill up the arrays and create the arguments. 8518 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8519 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8520 Info.PointersArray, Info.SizesArray, 8521 Info.MapTypesArray, Info); 8522 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8523 InputInfo.BasePointersArray = 8524 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8525 InputInfo.PointersArray = 8526 Address(Info.PointersArray, CGM.getPointerAlign()); 8527 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 8528 MapTypesArray = Info.MapTypesArray; 8529 if (RequiresOuterTask) 8530 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8531 else 8532 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8533 }; 8534 8535 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 8536 CodeGenFunction &CGF, PrePostActionTy &) { 8537 if (RequiresOuterTask) { 8538 CodeGenFunction::OMPTargetDataInfo InputInfo; 8539 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 8540 } else { 8541 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 8542 } 8543 }; 8544 8545 // If we have a target function ID it means that we need to support 8546 // offloading, otherwise, just execute on the host. We need to execute on host 8547 // regardless of the conditional in the if clause if, e.g., the user do not 8548 // specify target triples. 8549 if (OutlinedFnID) { 8550 if (IfCond) { 8551 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 8552 } else { 8553 RegionCodeGenTy ThenRCG(TargetThenGen); 8554 ThenRCG(CGF); 8555 } 8556 } else { 8557 RegionCodeGenTy ElseRCG(TargetElseGen); 8558 ElseRCG(CGF); 8559 } 8560 } 8561 8562 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 8563 StringRef ParentName) { 8564 if (!S) 8565 return; 8566 8567 // Codegen OMP target directives that offload compute to the device. 8568 bool RequiresDeviceCodegen = 8569 isa<OMPExecutableDirective>(S) && 8570 isOpenMPTargetExecutionDirective( 8571 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 8572 8573 if (RequiresDeviceCodegen) { 8574 const auto &E = *cast<OMPExecutableDirective>(S); 8575 unsigned DeviceID; 8576 unsigned FileID; 8577 unsigned Line; 8578 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 8579 FileID, Line); 8580 8581 // Is this a target region that should not be emitted as an entry point? If 8582 // so just signal we are done with this target region. 8583 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 8584 ParentName, Line)) 8585 return; 8586 8587 switch (E.getDirectiveKind()) { 8588 case OMPD_target: 8589 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 8590 cast<OMPTargetDirective>(E)); 8591 break; 8592 case OMPD_target_parallel: 8593 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 8594 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 8595 break; 8596 case OMPD_target_teams: 8597 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 8598 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 8599 break; 8600 case OMPD_target_teams_distribute: 8601 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 8602 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 8603 break; 8604 case OMPD_target_teams_distribute_simd: 8605 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 8606 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 8607 break; 8608 case OMPD_target_parallel_for: 8609 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 8610 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 8611 break; 8612 case OMPD_target_parallel_for_simd: 8613 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 8614 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 8615 break; 8616 case OMPD_target_simd: 8617 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 8618 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 8619 break; 8620 case OMPD_target_teams_distribute_parallel_for: 8621 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 8622 CGM, ParentName, 8623 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 8624 break; 8625 case OMPD_target_teams_distribute_parallel_for_simd: 8626 CodeGenFunction:: 8627 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 8628 CGM, ParentName, 8629 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 8630 break; 8631 case OMPD_parallel: 8632 case OMPD_for: 8633 case OMPD_parallel_for: 8634 case OMPD_parallel_sections: 8635 case OMPD_for_simd: 8636 case OMPD_parallel_for_simd: 8637 case OMPD_cancel: 8638 case OMPD_cancellation_point: 8639 case OMPD_ordered: 8640 case OMPD_threadprivate: 8641 case OMPD_task: 8642 case OMPD_simd: 8643 case OMPD_sections: 8644 case OMPD_section: 8645 case OMPD_single: 8646 case OMPD_master: 8647 case OMPD_critical: 8648 case OMPD_taskyield: 8649 case OMPD_barrier: 8650 case OMPD_taskwait: 8651 case OMPD_taskgroup: 8652 case OMPD_atomic: 8653 case OMPD_flush: 8654 case OMPD_teams: 8655 case OMPD_target_data: 8656 case OMPD_target_exit_data: 8657 case OMPD_target_enter_data: 8658 case OMPD_distribute: 8659 case OMPD_distribute_simd: 8660 case OMPD_distribute_parallel_for: 8661 case OMPD_distribute_parallel_for_simd: 8662 case OMPD_teams_distribute: 8663 case OMPD_teams_distribute_simd: 8664 case OMPD_teams_distribute_parallel_for: 8665 case OMPD_teams_distribute_parallel_for_simd: 8666 case OMPD_target_update: 8667 case OMPD_declare_simd: 8668 case OMPD_declare_target: 8669 case OMPD_end_declare_target: 8670 case OMPD_declare_reduction: 8671 case OMPD_declare_mapper: 8672 case OMPD_taskloop: 8673 case OMPD_taskloop_simd: 8674 case OMPD_requires: 8675 case OMPD_unknown: 8676 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 8677 } 8678 return; 8679 } 8680 8681 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 8682 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 8683 return; 8684 8685 scanForTargetRegionsFunctions( 8686 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 8687 return; 8688 } 8689 8690 // If this is a lambda function, look into its body. 8691 if (const auto *L = dyn_cast<LambdaExpr>(S)) 8692 S = L->getBody(); 8693 8694 // Keep looking for target regions recursively. 8695 for (const Stmt *II : S->children()) 8696 scanForTargetRegionsFunctions(II, ParentName); 8697 } 8698 8699 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 8700 // If emitting code for the host, we do not process FD here. Instead we do 8701 // the normal code generation. 8702 if (!CGM.getLangOpts().OpenMPIsDevice) 8703 return false; 8704 8705 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 8706 StringRef Name = CGM.getMangledName(GD); 8707 // Try to detect target regions in the function. 8708 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) 8709 scanForTargetRegionsFunctions(FD->getBody(), Name); 8710 8711 // Do not to emit function if it is not marked as declare target. 8712 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 8713 AlreadyEmittedTargetFunctions.count(Name) == 0; 8714 } 8715 8716 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 8717 if (!CGM.getLangOpts().OpenMPIsDevice) 8718 return false; 8719 8720 // Check if there are Ctors/Dtors in this declaration and look for target 8721 // regions in it. We use the complete variant to produce the kernel name 8722 // mangling. 8723 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 8724 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 8725 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 8726 StringRef ParentName = 8727 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 8728 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 8729 } 8730 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 8731 StringRef ParentName = 8732 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 8733 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 8734 } 8735 } 8736 8737 // Do not to emit variable if it is not marked as declare target. 8738 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8739 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 8740 cast<VarDecl>(GD.getDecl())); 8741 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { 8742 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 8743 return true; 8744 } 8745 return false; 8746 } 8747 8748 llvm::Constant * 8749 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 8750 const VarDecl *VD) { 8751 assert(VD->getType().isConstant(CGM.getContext()) && 8752 "Expected constant variable."); 8753 StringRef VarName; 8754 llvm::Constant *Addr; 8755 llvm::GlobalValue::LinkageTypes Linkage; 8756 QualType Ty = VD->getType(); 8757 SmallString<128> Buffer; 8758 { 8759 unsigned DeviceID; 8760 unsigned FileID; 8761 unsigned Line; 8762 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 8763 FileID, Line); 8764 llvm::raw_svector_ostream OS(Buffer); 8765 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 8766 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 8767 VarName = OS.str(); 8768 } 8769 Linkage = llvm::GlobalValue::InternalLinkage; 8770 Addr = 8771 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 8772 getDefaultFirstprivateAddressSpace()); 8773 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 8774 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 8775 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 8776 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 8777 VarName, Addr, VarSize, 8778 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 8779 return Addr; 8780 } 8781 8782 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 8783 llvm::Constant *Addr) { 8784 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8785 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8786 if (!Res) { 8787 if (CGM.getLangOpts().OpenMPIsDevice) { 8788 // Register non-target variables being emitted in device code (debug info 8789 // may cause this). 8790 StringRef VarName = CGM.getMangledName(VD); 8791 EmittedNonTargetVariables.try_emplace(VarName, Addr); 8792 } 8793 return; 8794 } 8795 // Register declare target variables. 8796 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 8797 StringRef VarName; 8798 CharUnits VarSize; 8799 llvm::GlobalValue::LinkageTypes Linkage; 8800 switch (*Res) { 8801 case OMPDeclareTargetDeclAttr::MT_To: 8802 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 8803 VarName = CGM.getMangledName(VD); 8804 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 8805 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 8806 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 8807 } else { 8808 VarSize = CharUnits::Zero(); 8809 } 8810 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 8811 // Temp solution to prevent optimizations of the internal variables. 8812 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 8813 std::string RefName = getName({VarName, "ref"}); 8814 if (!CGM.GetGlobalValue(RefName)) { 8815 llvm::Constant *AddrRef = 8816 getOrCreateInternalVariable(Addr->getType(), RefName); 8817 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 8818 GVAddrRef->setConstant(/*Val=*/true); 8819 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 8820 GVAddrRef->setInitializer(Addr); 8821 CGM.addCompilerUsedGlobal(GVAddrRef); 8822 } 8823 } 8824 break; 8825 case OMPDeclareTargetDeclAttr::MT_Link: 8826 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 8827 if (CGM.getLangOpts().OpenMPIsDevice) { 8828 VarName = Addr->getName(); 8829 Addr = nullptr; 8830 } else { 8831 VarName = getAddrOfDeclareTargetLink(VD).getName(); 8832 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); 8833 } 8834 VarSize = CGM.getPointerSize(); 8835 Linkage = llvm::GlobalValue::WeakAnyLinkage; 8836 break; 8837 } 8838 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 8839 VarName, Addr, VarSize, Flags, Linkage); 8840 } 8841 8842 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 8843 if (isa<FunctionDecl>(GD.getDecl()) || 8844 isa<OMPDeclareReductionDecl>(GD.getDecl())) 8845 return emitTargetFunctions(GD); 8846 8847 return emitTargetGlobalVariable(GD); 8848 } 8849 8850 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 8851 for (const VarDecl *VD : DeferredGlobalVariables) { 8852 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8853 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8854 if (!Res) 8855 continue; 8856 if (*Res == OMPDeclareTargetDeclAttr::MT_To) { 8857 CGM.EmitGlobal(VD); 8858 } else { 8859 assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && 8860 "Expected to or link clauses."); 8861 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); 8862 } 8863 } 8864 } 8865 8866 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 8867 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 8868 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 8869 " Expected target-based directive."); 8870 } 8871 8872 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 8873 CodeGenModule &CGM) 8874 : CGM(CGM) { 8875 if (CGM.getLangOpts().OpenMPIsDevice) { 8876 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 8877 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 8878 } 8879 } 8880 8881 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 8882 if (CGM.getLangOpts().OpenMPIsDevice) 8883 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 8884 } 8885 8886 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 8887 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 8888 return true; 8889 8890 StringRef Name = CGM.getMangledName(GD); 8891 const auto *D = cast<FunctionDecl>(GD.getDecl()); 8892 // Do not to emit function if it is marked as declare target as it was already 8893 // emitted. 8894 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 8895 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 8896 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 8897 return !F->isDeclaration(); 8898 return false; 8899 } 8900 return true; 8901 } 8902 8903 return !AlreadyEmittedTargetFunctions.insert(Name).second; 8904 } 8905 8906 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 8907 // If we have offloading in the current module, we need to emit the entries 8908 // now and register the offloading descriptor. 8909 createOffloadEntriesAndInfoMetadata(); 8910 8911 // Create and register the offloading binary descriptors. This is the main 8912 // entity that captures all the information about offloading in the current 8913 // compilation unit. 8914 return createOffloadingBinaryDescriptorRegistration(); 8915 } 8916 8917 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 8918 const OMPExecutableDirective &D, 8919 SourceLocation Loc, 8920 llvm::Function *OutlinedFn, 8921 ArrayRef<llvm::Value *> CapturedVars) { 8922 if (!CGF.HaveInsertPoint()) 8923 return; 8924 8925 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 8926 CodeGenFunction::RunCleanupsScope Scope(CGF); 8927 8928 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 8929 llvm::Value *Args[] = { 8930 RTLoc, 8931 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 8932 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 8933 llvm::SmallVector<llvm::Value *, 16> RealArgs; 8934 RealArgs.append(std::begin(Args), std::end(Args)); 8935 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 8936 8937 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 8938 CGF.EmitRuntimeCall(RTLFn, RealArgs); 8939 } 8940 8941 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 8942 const Expr *NumTeams, 8943 const Expr *ThreadLimit, 8944 SourceLocation Loc) { 8945 if (!CGF.HaveInsertPoint()) 8946 return; 8947 8948 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 8949 8950 llvm::Value *NumTeamsVal = 8951 NumTeams 8952 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 8953 CGF.CGM.Int32Ty, /* isSigned = */ true) 8954 : CGF.Builder.getInt32(0); 8955 8956 llvm::Value *ThreadLimitVal = 8957 ThreadLimit 8958 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 8959 CGF.CGM.Int32Ty, /* isSigned = */ true) 8960 : CGF.Builder.getInt32(0); 8961 8962 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 8963 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 8964 ThreadLimitVal}; 8965 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 8966 PushNumTeamsArgs); 8967 } 8968 8969 void CGOpenMPRuntime::emitTargetDataCalls( 8970 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 8971 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 8972 if (!CGF.HaveInsertPoint()) 8973 return; 8974 8975 // Action used to replace the default codegen action and turn privatization 8976 // off. 8977 PrePostActionTy NoPrivAction; 8978 8979 // Generate the code for the opening of the data environment. Capture all the 8980 // arguments of the runtime call by reference because they are used in the 8981 // closing of the region. 8982 auto &&BeginThenGen = [this, &D, Device, &Info, 8983 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 8984 // Fill up the arrays with all the mapped variables. 8985 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8986 MappableExprsHandler::MapValuesArrayTy Pointers; 8987 MappableExprsHandler::MapValuesArrayTy Sizes; 8988 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8989 8990 // Get map clause information. 8991 MappableExprsHandler MCHandler(D, CGF); 8992 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 8993 8994 // Fill up the arrays and create the arguments. 8995 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8996 8997 llvm::Value *BasePointersArrayArg = nullptr; 8998 llvm::Value *PointersArrayArg = nullptr; 8999 llvm::Value *SizesArrayArg = nullptr; 9000 llvm::Value *MapTypesArrayArg = nullptr; 9001 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9002 SizesArrayArg, MapTypesArrayArg, Info); 9003 9004 // Emit device ID if any. 9005 llvm::Value *DeviceID = nullptr; 9006 if (Device) { 9007 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9008 CGF.Int64Ty, /*isSigned=*/true); 9009 } else { 9010 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9011 } 9012 9013 // Emit the number of elements in the offloading arrays. 9014 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9015 9016 llvm::Value *OffloadingArgs[] = { 9017 DeviceID, PointerNum, BasePointersArrayArg, 9018 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9019 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 9020 OffloadingArgs); 9021 9022 // If device pointer privatization is required, emit the body of the region 9023 // here. It will have to be duplicated: with and without privatization. 9024 if (!Info.CaptureDeviceAddrMap.empty()) 9025 CodeGen(CGF); 9026 }; 9027 9028 // Generate code for the closing of the data region. 9029 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 9030 PrePostActionTy &) { 9031 assert(Info.isValid() && "Invalid data environment closing arguments."); 9032 9033 llvm::Value *BasePointersArrayArg = nullptr; 9034 llvm::Value *PointersArrayArg = nullptr; 9035 llvm::Value *SizesArrayArg = nullptr; 9036 llvm::Value *MapTypesArrayArg = nullptr; 9037 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9038 SizesArrayArg, MapTypesArrayArg, Info); 9039 9040 // Emit device ID if any. 9041 llvm::Value *DeviceID = nullptr; 9042 if (Device) { 9043 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9044 CGF.Int64Ty, /*isSigned=*/true); 9045 } else { 9046 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9047 } 9048 9049 // Emit the number of elements in the offloading arrays. 9050 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9051 9052 llvm::Value *OffloadingArgs[] = { 9053 DeviceID, PointerNum, BasePointersArrayArg, 9054 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9055 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 9056 OffloadingArgs); 9057 }; 9058 9059 // If we need device pointer privatization, we need to emit the body of the 9060 // region with no privatization in the 'else' branch of the conditional. 9061 // Otherwise, we don't have to do anything. 9062 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 9063 PrePostActionTy &) { 9064 if (!Info.CaptureDeviceAddrMap.empty()) { 9065 CodeGen.setAction(NoPrivAction); 9066 CodeGen(CGF); 9067 } 9068 }; 9069 9070 // We don't have to do anything to close the region if the if clause evaluates 9071 // to false. 9072 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 9073 9074 if (IfCond) { 9075 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 9076 } else { 9077 RegionCodeGenTy RCG(BeginThenGen); 9078 RCG(CGF); 9079 } 9080 9081 // If we don't require privatization of device pointers, we emit the body in 9082 // between the runtime calls. This avoids duplicating the body code. 9083 if (Info.CaptureDeviceAddrMap.empty()) { 9084 CodeGen.setAction(NoPrivAction); 9085 CodeGen(CGF); 9086 } 9087 9088 if (IfCond) { 9089 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 9090 } else { 9091 RegionCodeGenTy RCG(EndThenGen); 9092 RCG(CGF); 9093 } 9094 } 9095 9096 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 9097 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9098 const Expr *Device) { 9099 if (!CGF.HaveInsertPoint()) 9100 return; 9101 9102 assert((isa<OMPTargetEnterDataDirective>(D) || 9103 isa<OMPTargetExitDataDirective>(D) || 9104 isa<OMPTargetUpdateDirective>(D)) && 9105 "Expecting either target enter, exit data, or update directives."); 9106 9107 CodeGenFunction::OMPTargetDataInfo InputInfo; 9108 llvm::Value *MapTypesArray = nullptr; 9109 // Generate the code for the opening of the data environment. 9110 auto &&ThenGen = [this, &D, Device, &InputInfo, 9111 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 9112 // Emit device ID if any. 9113 llvm::Value *DeviceID = nullptr; 9114 if (Device) { 9115 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9116 CGF.Int64Ty, /*isSigned=*/true); 9117 } else { 9118 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9119 } 9120 9121 // Emit the number of elements in the offloading arrays. 9122 llvm::Constant *PointerNum = 9123 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9124 9125 llvm::Value *OffloadingArgs[] = {DeviceID, 9126 PointerNum, 9127 InputInfo.BasePointersArray.getPointer(), 9128 InputInfo.PointersArray.getPointer(), 9129 InputInfo.SizesArray.getPointer(), 9130 MapTypesArray}; 9131 9132 // Select the right runtime function call for each expected standalone 9133 // directive. 9134 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9135 OpenMPRTLFunction RTLFn; 9136 switch (D.getDirectiveKind()) { 9137 case OMPD_target_enter_data: 9138 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 9139 : OMPRTL__tgt_target_data_begin; 9140 break; 9141 case OMPD_target_exit_data: 9142 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 9143 : OMPRTL__tgt_target_data_end; 9144 break; 9145 case OMPD_target_update: 9146 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 9147 : OMPRTL__tgt_target_data_update; 9148 break; 9149 case OMPD_parallel: 9150 case OMPD_for: 9151 case OMPD_parallel_for: 9152 case OMPD_parallel_sections: 9153 case OMPD_for_simd: 9154 case OMPD_parallel_for_simd: 9155 case OMPD_cancel: 9156 case OMPD_cancellation_point: 9157 case OMPD_ordered: 9158 case OMPD_threadprivate: 9159 case OMPD_task: 9160 case OMPD_simd: 9161 case OMPD_sections: 9162 case OMPD_section: 9163 case OMPD_single: 9164 case OMPD_master: 9165 case OMPD_critical: 9166 case OMPD_taskyield: 9167 case OMPD_barrier: 9168 case OMPD_taskwait: 9169 case OMPD_taskgroup: 9170 case OMPD_atomic: 9171 case OMPD_flush: 9172 case OMPD_teams: 9173 case OMPD_target_data: 9174 case OMPD_distribute: 9175 case OMPD_distribute_simd: 9176 case OMPD_distribute_parallel_for: 9177 case OMPD_distribute_parallel_for_simd: 9178 case OMPD_teams_distribute: 9179 case OMPD_teams_distribute_simd: 9180 case OMPD_teams_distribute_parallel_for: 9181 case OMPD_teams_distribute_parallel_for_simd: 9182 case OMPD_declare_simd: 9183 case OMPD_declare_target: 9184 case OMPD_end_declare_target: 9185 case OMPD_declare_reduction: 9186 case OMPD_declare_mapper: 9187 case OMPD_taskloop: 9188 case OMPD_taskloop_simd: 9189 case OMPD_target: 9190 case OMPD_target_simd: 9191 case OMPD_target_teams_distribute: 9192 case OMPD_target_teams_distribute_simd: 9193 case OMPD_target_teams_distribute_parallel_for: 9194 case OMPD_target_teams_distribute_parallel_for_simd: 9195 case OMPD_target_teams: 9196 case OMPD_target_parallel: 9197 case OMPD_target_parallel_for: 9198 case OMPD_target_parallel_for_simd: 9199 case OMPD_requires: 9200 case OMPD_unknown: 9201 llvm_unreachable("Unexpected standalone target data directive."); 9202 break; 9203 } 9204 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 9205 }; 9206 9207 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 9208 CodeGenFunction &CGF, PrePostActionTy &) { 9209 // Fill up the arrays with all the mapped variables. 9210 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9211 MappableExprsHandler::MapValuesArrayTy Pointers; 9212 MappableExprsHandler::MapValuesArrayTy Sizes; 9213 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9214 9215 // Get map clause information. 9216 MappableExprsHandler MEHandler(D, CGF); 9217 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9218 9219 TargetDataInfo Info; 9220 // Fill up the arrays and create the arguments. 9221 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9222 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9223 Info.PointersArray, Info.SizesArray, 9224 Info.MapTypesArray, Info); 9225 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9226 InputInfo.BasePointersArray = 9227 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9228 InputInfo.PointersArray = 9229 Address(Info.PointersArray, CGM.getPointerAlign()); 9230 InputInfo.SizesArray = 9231 Address(Info.SizesArray, CGM.getPointerAlign()); 9232 MapTypesArray = Info.MapTypesArray; 9233 if (D.hasClausesOfKind<OMPDependClause>()) 9234 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9235 else 9236 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9237 }; 9238 9239 if (IfCond) { 9240 emitOMPIfClause(CGF, IfCond, TargetThenGen, 9241 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 9242 } else { 9243 RegionCodeGenTy ThenRCG(TargetThenGen); 9244 ThenRCG(CGF); 9245 } 9246 } 9247 9248 namespace { 9249 /// Kind of parameter in a function with 'declare simd' directive. 9250 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 9251 /// Attribute set of the parameter. 9252 struct ParamAttrTy { 9253 ParamKindTy Kind = Vector; 9254 llvm::APSInt StrideOrArg; 9255 llvm::APSInt Alignment; 9256 }; 9257 } // namespace 9258 9259 static unsigned evaluateCDTSize(const FunctionDecl *FD, 9260 ArrayRef<ParamAttrTy> ParamAttrs) { 9261 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 9262 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 9263 // of that clause. The VLEN value must be power of 2. 9264 // In other case the notion of the function`s "characteristic data type" (CDT) 9265 // is used to compute the vector length. 9266 // CDT is defined in the following order: 9267 // a) For non-void function, the CDT is the return type. 9268 // b) If the function has any non-uniform, non-linear parameters, then the 9269 // CDT is the type of the first such parameter. 9270 // c) If the CDT determined by a) or b) above is struct, union, or class 9271 // type which is pass-by-value (except for the type that maps to the 9272 // built-in complex data type), the characteristic data type is int. 9273 // d) If none of the above three cases is applicable, the CDT is int. 9274 // The VLEN is then determined based on the CDT and the size of vector 9275 // register of that ISA for which current vector version is generated. The 9276 // VLEN is computed using the formula below: 9277 // VLEN = sizeof(vector_register) / sizeof(CDT), 9278 // where vector register size specified in section 3.2.1 Registers and the 9279 // Stack Frame of original AMD64 ABI document. 9280 QualType RetType = FD->getReturnType(); 9281 if (RetType.isNull()) 9282 return 0; 9283 ASTContext &C = FD->getASTContext(); 9284 QualType CDT; 9285 if (!RetType.isNull() && !RetType->isVoidType()) { 9286 CDT = RetType; 9287 } else { 9288 unsigned Offset = 0; 9289 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 9290 if (ParamAttrs[Offset].Kind == Vector) 9291 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 9292 ++Offset; 9293 } 9294 if (CDT.isNull()) { 9295 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9296 if (ParamAttrs[I + Offset].Kind == Vector) { 9297 CDT = FD->getParamDecl(I)->getType(); 9298 break; 9299 } 9300 } 9301 } 9302 } 9303 if (CDT.isNull()) 9304 CDT = C.IntTy; 9305 CDT = CDT->getCanonicalTypeUnqualified(); 9306 if (CDT->isRecordType() || CDT->isUnionType()) 9307 CDT = C.IntTy; 9308 return C.getTypeSize(CDT); 9309 } 9310 9311 static void 9312 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 9313 const llvm::APSInt &VLENVal, 9314 ArrayRef<ParamAttrTy> ParamAttrs, 9315 OMPDeclareSimdDeclAttr::BranchStateTy State) { 9316 struct ISADataTy { 9317 char ISA; 9318 unsigned VecRegSize; 9319 }; 9320 ISADataTy ISAData[] = { 9321 { 9322 'b', 128 9323 }, // SSE 9324 { 9325 'c', 256 9326 }, // AVX 9327 { 9328 'd', 256 9329 }, // AVX2 9330 { 9331 'e', 512 9332 }, // AVX512 9333 }; 9334 llvm::SmallVector<char, 2> Masked; 9335 switch (State) { 9336 case OMPDeclareSimdDeclAttr::BS_Undefined: 9337 Masked.push_back('N'); 9338 Masked.push_back('M'); 9339 break; 9340 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9341 Masked.push_back('N'); 9342 break; 9343 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9344 Masked.push_back('M'); 9345 break; 9346 } 9347 for (char Mask : Masked) { 9348 for (const ISADataTy &Data : ISAData) { 9349 SmallString<256> Buffer; 9350 llvm::raw_svector_ostream Out(Buffer); 9351 Out << "_ZGV" << Data.ISA << Mask; 9352 if (!VLENVal) { 9353 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 9354 evaluateCDTSize(FD, ParamAttrs)); 9355 } else { 9356 Out << VLENVal; 9357 } 9358 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 9359 switch (ParamAttr.Kind){ 9360 case LinearWithVarStride: 9361 Out << 's' << ParamAttr.StrideOrArg; 9362 break; 9363 case Linear: 9364 Out << 'l'; 9365 if (!!ParamAttr.StrideOrArg) 9366 Out << ParamAttr.StrideOrArg; 9367 break; 9368 case Uniform: 9369 Out << 'u'; 9370 break; 9371 case Vector: 9372 Out << 'v'; 9373 break; 9374 } 9375 if (!!ParamAttr.Alignment) 9376 Out << 'a' << ParamAttr.Alignment; 9377 } 9378 Out << '_' << Fn->getName(); 9379 Fn->addFnAttr(Out.str()); 9380 } 9381 } 9382 } 9383 9384 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 9385 llvm::Function *Fn) { 9386 ASTContext &C = CGM.getContext(); 9387 FD = FD->getMostRecentDecl(); 9388 // Map params to their positions in function decl. 9389 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 9390 if (isa<CXXMethodDecl>(FD)) 9391 ParamPositions.try_emplace(FD, 0); 9392 unsigned ParamPos = ParamPositions.size(); 9393 for (const ParmVarDecl *P : FD->parameters()) { 9394 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 9395 ++ParamPos; 9396 } 9397 while (FD) { 9398 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 9399 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 9400 // Mark uniform parameters. 9401 for (const Expr *E : Attr->uniforms()) { 9402 E = E->IgnoreParenImpCasts(); 9403 unsigned Pos; 9404 if (isa<CXXThisExpr>(E)) { 9405 Pos = ParamPositions[FD]; 9406 } else { 9407 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 9408 ->getCanonicalDecl(); 9409 Pos = ParamPositions[PVD]; 9410 } 9411 ParamAttrs[Pos].Kind = Uniform; 9412 } 9413 // Get alignment info. 9414 auto NI = Attr->alignments_begin(); 9415 for (const Expr *E : Attr->aligneds()) { 9416 E = E->IgnoreParenImpCasts(); 9417 unsigned Pos; 9418 QualType ParmTy; 9419 if (isa<CXXThisExpr>(E)) { 9420 Pos = ParamPositions[FD]; 9421 ParmTy = E->getType(); 9422 } else { 9423 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 9424 ->getCanonicalDecl(); 9425 Pos = ParamPositions[PVD]; 9426 ParmTy = PVD->getType(); 9427 } 9428 ParamAttrs[Pos].Alignment = 9429 (*NI) 9430 ? (*NI)->EvaluateKnownConstInt(C) 9431 : llvm::APSInt::getUnsigned( 9432 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 9433 .getQuantity()); 9434 ++NI; 9435 } 9436 // Mark linear parameters. 9437 auto SI = Attr->steps_begin(); 9438 auto MI = Attr->modifiers_begin(); 9439 for (const Expr *E : Attr->linears()) { 9440 E = E->IgnoreParenImpCasts(); 9441 unsigned Pos; 9442 if (isa<CXXThisExpr>(E)) { 9443 Pos = ParamPositions[FD]; 9444 } else { 9445 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 9446 ->getCanonicalDecl(); 9447 Pos = ParamPositions[PVD]; 9448 } 9449 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 9450 ParamAttr.Kind = Linear; 9451 if (*SI) { 9452 Expr::EvalResult Result; 9453 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 9454 if (const auto *DRE = 9455 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 9456 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 9457 ParamAttr.Kind = LinearWithVarStride; 9458 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 9459 ParamPositions[StridePVD->getCanonicalDecl()]); 9460 } 9461 } 9462 } else { 9463 ParamAttr.StrideOrArg = Result.Val.getInt(); 9464 } 9465 } 9466 ++SI; 9467 ++MI; 9468 } 9469 llvm::APSInt VLENVal; 9470 if (const Expr *VLEN = Attr->getSimdlen()) 9471 VLENVal = VLEN->EvaluateKnownConstInt(C); 9472 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 9473 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 9474 CGM.getTriple().getArch() == llvm::Triple::x86_64) 9475 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 9476 } 9477 FD = FD->getPreviousDecl(); 9478 } 9479 } 9480 9481 namespace { 9482 /// Cleanup action for doacross support. 9483 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 9484 public: 9485 static const int DoacrossFinArgs = 2; 9486 9487 private: 9488 llvm::FunctionCallee RTLFn; 9489 llvm::Value *Args[DoacrossFinArgs]; 9490 9491 public: 9492 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 9493 ArrayRef<llvm::Value *> CallArgs) 9494 : RTLFn(RTLFn) { 9495 assert(CallArgs.size() == DoacrossFinArgs); 9496 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 9497 } 9498 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 9499 if (!CGF.HaveInsertPoint()) 9500 return; 9501 CGF.EmitRuntimeCall(RTLFn, Args); 9502 } 9503 }; 9504 } // namespace 9505 9506 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 9507 const OMPLoopDirective &D, 9508 ArrayRef<Expr *> NumIterations) { 9509 if (!CGF.HaveInsertPoint()) 9510 return; 9511 9512 ASTContext &C = CGM.getContext(); 9513 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9514 RecordDecl *RD; 9515 if (KmpDimTy.isNull()) { 9516 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 9517 // kmp_int64 lo; // lower 9518 // kmp_int64 up; // upper 9519 // kmp_int64 st; // stride 9520 // }; 9521 RD = C.buildImplicitRecord("kmp_dim"); 9522 RD->startDefinition(); 9523 addFieldToRecordDecl(C, RD, Int64Ty); 9524 addFieldToRecordDecl(C, RD, Int64Ty); 9525 addFieldToRecordDecl(C, RD, Int64Ty); 9526 RD->completeDefinition(); 9527 KmpDimTy = C.getRecordType(RD); 9528 } else { 9529 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 9530 } 9531 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 9532 QualType ArrayTy = 9533 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 9534 9535 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9536 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 9537 enum { LowerFD = 0, UpperFD, StrideFD }; 9538 // Fill dims with data. 9539 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 9540 LValue DimsLVal = CGF.MakeAddrLValue( 9541 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 9542 // dims.upper = num_iterations; 9543 LValue UpperLVal = CGF.EmitLValueForField( 9544 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 9545 llvm::Value *NumIterVal = 9546 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 9547 D.getNumIterations()->getType(), Int64Ty, 9548 D.getNumIterations()->getExprLoc()); 9549 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 9550 // dims.stride = 1; 9551 LValue StrideLVal = CGF.EmitLValueForField( 9552 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9553 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 9554 StrideLVal); 9555 } 9556 9557 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 9558 // kmp_int32 num_dims, struct kmp_dim * dims); 9559 llvm::Value *Args[] = { 9560 emitUpdateLocation(CGF, D.getBeginLoc()), 9561 getThreadID(CGF, D.getBeginLoc()), 9562 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 9563 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9564 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 9565 CGM.VoidPtrTy)}; 9566 9567 llvm::FunctionCallee RTLFn = 9568 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 9569 CGF.EmitRuntimeCall(RTLFn, Args); 9570 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 9571 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 9572 llvm::FunctionCallee FiniRTLFn = 9573 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 9574 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 9575 llvm::makeArrayRef(FiniArgs)); 9576 } 9577 9578 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 9579 const OMPDependClause *C) { 9580 QualType Int64Ty = 9581 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9582 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 9583 QualType ArrayTy = CGM.getContext().getConstantArrayType( 9584 Int64Ty, Size, ArrayType::Normal, 0); 9585 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 9586 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 9587 const Expr *CounterVal = C->getLoopData(I); 9588 assert(CounterVal); 9589 llvm::Value *CntVal = CGF.EmitScalarConversion( 9590 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 9591 CounterVal->getExprLoc()); 9592 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 9593 /*Volatile=*/false, Int64Ty); 9594 } 9595 llvm::Value *Args[] = { 9596 emitUpdateLocation(CGF, C->getBeginLoc()), 9597 getThreadID(CGF, C->getBeginLoc()), 9598 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 9599 llvm::FunctionCallee RTLFn; 9600 if (C->getDependencyKind() == OMPC_DEPEND_source) { 9601 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 9602 } else { 9603 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 9604 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 9605 } 9606 CGF.EmitRuntimeCall(RTLFn, Args); 9607 } 9608 9609 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 9610 llvm::FunctionCallee Callee, 9611 ArrayRef<llvm::Value *> Args) const { 9612 assert(Loc.isValid() && "Outlined function call location must be valid."); 9613 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 9614 9615 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 9616 if (Fn->doesNotThrow()) { 9617 CGF.EmitNounwindRuntimeCall(Fn, Args); 9618 return; 9619 } 9620 } 9621 CGF.EmitRuntimeCall(Callee, Args); 9622 } 9623 9624 void CGOpenMPRuntime::emitOutlinedFunctionCall( 9625 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 9626 ArrayRef<llvm::Value *> Args) const { 9627 emitCall(CGF, Loc, OutlinedFn, Args); 9628 } 9629 9630 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 9631 const VarDecl *NativeParam, 9632 const VarDecl *TargetParam) const { 9633 return CGF.GetAddrOfLocalVar(NativeParam); 9634 } 9635 9636 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 9637 const VarDecl *VD) { 9638 return Address::invalid(); 9639 } 9640 9641 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 9642 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 9643 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 9644 llvm_unreachable("Not supported in SIMD-only mode"); 9645 } 9646 9647 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 9648 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 9649 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 9650 llvm_unreachable("Not supported in SIMD-only mode"); 9651 } 9652 9653 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 9654 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 9655 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 9656 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 9657 bool Tied, unsigned &NumberOfParts) { 9658 llvm_unreachable("Not supported in SIMD-only mode"); 9659 } 9660 9661 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 9662 SourceLocation Loc, 9663 llvm::Function *OutlinedFn, 9664 ArrayRef<llvm::Value *> CapturedVars, 9665 const Expr *IfCond) { 9666 llvm_unreachable("Not supported in SIMD-only mode"); 9667 } 9668 9669 void CGOpenMPSIMDRuntime::emitCriticalRegion( 9670 CodeGenFunction &CGF, StringRef CriticalName, 9671 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 9672 const Expr *Hint) { 9673 llvm_unreachable("Not supported in SIMD-only mode"); 9674 } 9675 9676 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 9677 const RegionCodeGenTy &MasterOpGen, 9678 SourceLocation Loc) { 9679 llvm_unreachable("Not supported in SIMD-only mode"); 9680 } 9681 9682 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 9683 SourceLocation Loc) { 9684 llvm_unreachable("Not supported in SIMD-only mode"); 9685 } 9686 9687 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 9688 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 9689 SourceLocation Loc) { 9690 llvm_unreachable("Not supported in SIMD-only mode"); 9691 } 9692 9693 void CGOpenMPSIMDRuntime::emitSingleRegion( 9694 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 9695 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 9696 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 9697 ArrayRef<const Expr *> AssignmentOps) { 9698 llvm_unreachable("Not supported in SIMD-only mode"); 9699 } 9700 9701 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 9702 const RegionCodeGenTy &OrderedOpGen, 9703 SourceLocation Loc, 9704 bool IsThreads) { 9705 llvm_unreachable("Not supported in SIMD-only mode"); 9706 } 9707 9708 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 9709 SourceLocation Loc, 9710 OpenMPDirectiveKind Kind, 9711 bool EmitChecks, 9712 bool ForceSimpleCall) { 9713 llvm_unreachable("Not supported in SIMD-only mode"); 9714 } 9715 9716 void CGOpenMPSIMDRuntime::emitForDispatchInit( 9717 CodeGenFunction &CGF, SourceLocation Loc, 9718 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 9719 bool Ordered, const DispatchRTInput &DispatchValues) { 9720 llvm_unreachable("Not supported in SIMD-only mode"); 9721 } 9722 9723 void CGOpenMPSIMDRuntime::emitForStaticInit( 9724 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 9725 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 9726 llvm_unreachable("Not supported in SIMD-only mode"); 9727 } 9728 9729 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 9730 CodeGenFunction &CGF, SourceLocation Loc, 9731 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 9732 llvm_unreachable("Not supported in SIMD-only mode"); 9733 } 9734 9735 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 9736 SourceLocation Loc, 9737 unsigned IVSize, 9738 bool IVSigned) { 9739 llvm_unreachable("Not supported in SIMD-only mode"); 9740 } 9741 9742 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 9743 SourceLocation Loc, 9744 OpenMPDirectiveKind DKind) { 9745 llvm_unreachable("Not supported in SIMD-only mode"); 9746 } 9747 9748 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 9749 SourceLocation Loc, 9750 unsigned IVSize, bool IVSigned, 9751 Address IL, Address LB, 9752 Address UB, Address ST) { 9753 llvm_unreachable("Not supported in SIMD-only mode"); 9754 } 9755 9756 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 9757 llvm::Value *NumThreads, 9758 SourceLocation Loc) { 9759 llvm_unreachable("Not supported in SIMD-only mode"); 9760 } 9761 9762 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 9763 OpenMPProcBindClauseKind ProcBind, 9764 SourceLocation Loc) { 9765 llvm_unreachable("Not supported in SIMD-only mode"); 9766 } 9767 9768 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 9769 const VarDecl *VD, 9770 Address VDAddr, 9771 SourceLocation Loc) { 9772 llvm_unreachable("Not supported in SIMD-only mode"); 9773 } 9774 9775 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 9776 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 9777 CodeGenFunction *CGF) { 9778 llvm_unreachable("Not supported in SIMD-only mode"); 9779 } 9780 9781 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 9782 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 9783 llvm_unreachable("Not supported in SIMD-only mode"); 9784 } 9785 9786 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 9787 ArrayRef<const Expr *> Vars, 9788 SourceLocation Loc) { 9789 llvm_unreachable("Not supported in SIMD-only mode"); 9790 } 9791 9792 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 9793 const OMPExecutableDirective &D, 9794 llvm::Function *TaskFunction, 9795 QualType SharedsTy, Address Shareds, 9796 const Expr *IfCond, 9797 const OMPTaskDataTy &Data) { 9798 llvm_unreachable("Not supported in SIMD-only mode"); 9799 } 9800 9801 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 9802 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 9803 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 9804 const Expr *IfCond, const OMPTaskDataTy &Data) { 9805 llvm_unreachable("Not supported in SIMD-only mode"); 9806 } 9807 9808 void CGOpenMPSIMDRuntime::emitReduction( 9809 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 9810 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 9811 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 9812 assert(Options.SimpleReduction && "Only simple reduction is expected."); 9813 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 9814 ReductionOps, Options); 9815 } 9816 9817 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 9818 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 9819 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 9820 llvm_unreachable("Not supported in SIMD-only mode"); 9821 } 9822 9823 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 9824 SourceLocation Loc, 9825 ReductionCodeGen &RCG, 9826 unsigned N) { 9827 llvm_unreachable("Not supported in SIMD-only mode"); 9828 } 9829 9830 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 9831 SourceLocation Loc, 9832 llvm::Value *ReductionsPtr, 9833 LValue SharedLVal) { 9834 llvm_unreachable("Not supported in SIMD-only mode"); 9835 } 9836 9837 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 9838 SourceLocation Loc) { 9839 llvm_unreachable("Not supported in SIMD-only mode"); 9840 } 9841 9842 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 9843 CodeGenFunction &CGF, SourceLocation Loc, 9844 OpenMPDirectiveKind CancelRegion) { 9845 llvm_unreachable("Not supported in SIMD-only mode"); 9846 } 9847 9848 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 9849 SourceLocation Loc, const Expr *IfCond, 9850 OpenMPDirectiveKind CancelRegion) { 9851 llvm_unreachable("Not supported in SIMD-only mode"); 9852 } 9853 9854 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 9855 const OMPExecutableDirective &D, StringRef ParentName, 9856 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 9857 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 9858 llvm_unreachable("Not supported in SIMD-only mode"); 9859 } 9860 9861 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 9862 const OMPExecutableDirective &D, 9863 llvm::Function *OutlinedFn, 9864 llvm::Value *OutlinedFnID, 9865 const Expr *IfCond, 9866 const Expr *Device) { 9867 llvm_unreachable("Not supported in SIMD-only mode"); 9868 } 9869 9870 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 9871 llvm_unreachable("Not supported in SIMD-only mode"); 9872 } 9873 9874 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9875 llvm_unreachable("Not supported in SIMD-only mode"); 9876 } 9877 9878 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 9879 return false; 9880 } 9881 9882 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 9883 return nullptr; 9884 } 9885 9886 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 9887 const OMPExecutableDirective &D, 9888 SourceLocation Loc, 9889 llvm::Function *OutlinedFn, 9890 ArrayRef<llvm::Value *> CapturedVars) { 9891 llvm_unreachable("Not supported in SIMD-only mode"); 9892 } 9893 9894 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9895 const Expr *NumTeams, 9896 const Expr *ThreadLimit, 9897 SourceLocation Loc) { 9898 llvm_unreachable("Not supported in SIMD-only mode"); 9899 } 9900 9901 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 9902 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9903 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9904 llvm_unreachable("Not supported in SIMD-only mode"); 9905 } 9906 9907 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 9908 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9909 const Expr *Device) { 9910 llvm_unreachable("Not supported in SIMD-only mode"); 9911 } 9912 9913 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 9914 const OMPLoopDirective &D, 9915 ArrayRef<Expr *> NumIterations) { 9916 llvm_unreachable("Not supported in SIMD-only mode"); 9917 } 9918 9919 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 9920 const OMPDependClause *C) { 9921 llvm_unreachable("Not supported in SIMD-only mode"); 9922 } 9923 9924 const VarDecl * 9925 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 9926 const VarDecl *NativeParam) const { 9927 llvm_unreachable("Not supported in SIMD-only mode"); 9928 } 9929 9930 Address 9931 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 9932 const VarDecl *NativeParam, 9933 const VarDecl *TargetParam) const { 9934 llvm_unreachable("Not supported in SIMD-only mode"); 9935 } 9936