1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "ConstantBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/Bitcode/BitcodeReader.h" 23 #include "llvm/IR/CallSite.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// \brief Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// \brief Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// \brief Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// \brief Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// \brief Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// \brief Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// \brief Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// \brief Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// \brief Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// \brief API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel) 103 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 104 HasCancel), 105 ThreadIDVar(ThreadIDVar) { 106 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 107 } 108 109 /// \brief Get a variable or parameter for storing global thread id 110 /// inside OpenMP construct. 111 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 112 113 /// \brief Get the name of the capture helper. 114 StringRef getHelperName() const override { return ".omp_outlined."; } 115 116 static bool classof(const CGCapturedStmtInfo *Info) { 117 return CGOpenMPRegionInfo::classof(Info) && 118 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 119 ParallelOutlinedRegion; 120 } 121 122 private: 123 /// \brief A variable or parameter storing global thread id for OpenMP 124 /// constructs. 125 const VarDecl *ThreadIDVar; 126 }; 127 128 /// \brief API for captured statement code generation in OpenMP constructs. 129 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 130 public: 131 class UntiedTaskActionTy final : public PrePostActionTy { 132 bool Untied; 133 const VarDecl *PartIDVar; 134 const RegionCodeGenTy UntiedCodeGen; 135 llvm::SwitchInst *UntiedSwitch = nullptr; 136 137 public: 138 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 139 const RegionCodeGenTy &UntiedCodeGen) 140 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 141 void Enter(CodeGenFunction &CGF) override { 142 if (Untied) { 143 // Emit task switching point. 144 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 145 CGF.GetAddrOfLocalVar(PartIDVar), 146 PartIDVar->getType()->castAs<PointerType>()); 147 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); 148 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 149 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 150 CGF.EmitBlock(DoneBB); 151 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 152 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 153 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 154 CGF.Builder.GetInsertBlock()); 155 emitUntiedSwitch(CGF); 156 } 157 } 158 void emitUntiedSwitch(CodeGenFunction &CGF) const { 159 if (Untied) { 160 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 161 CGF.GetAddrOfLocalVar(PartIDVar), 162 PartIDVar->getType()->castAs<PointerType>()); 163 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 164 PartIdLVal); 165 UntiedCodeGen(CGF); 166 CodeGenFunction::JumpDest CurPoint = 167 CGF.getJumpDestInCurrentScope(".untied.next."); 168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 170 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 171 CGF.Builder.GetInsertBlock()); 172 CGF.EmitBranchThroughCleanup(CurPoint); 173 CGF.EmitBlock(CurPoint.getBlock()); 174 } 175 } 176 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 177 }; 178 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 179 const VarDecl *ThreadIDVar, 180 const RegionCodeGenTy &CodeGen, 181 OpenMPDirectiveKind Kind, bool HasCancel, 182 const UntiedTaskActionTy &Action) 183 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 184 ThreadIDVar(ThreadIDVar), Action(Action) { 185 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 186 } 187 188 /// \brief Get a variable or parameter for storing global thread id 189 /// inside OpenMP construct. 190 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 191 192 /// \brief Get an LValue for the current ThreadID variable. 193 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 194 195 /// \brief Get the name of the capture helper. 196 StringRef getHelperName() const override { return ".omp_outlined."; } 197 198 void emitUntiedSwitch(CodeGenFunction &CGF) override { 199 Action.emitUntiedSwitch(CGF); 200 } 201 202 static bool classof(const CGCapturedStmtInfo *Info) { 203 return CGOpenMPRegionInfo::classof(Info) && 204 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 205 TaskOutlinedRegion; 206 } 207 208 private: 209 /// \brief A variable or parameter storing global thread id for OpenMP 210 /// constructs. 211 const VarDecl *ThreadIDVar; 212 /// Action for emitting code for untied tasks. 213 const UntiedTaskActionTy &Action; 214 }; 215 216 /// \brief API for inlined captured statement code generation in OpenMP 217 /// constructs. 218 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 219 public: 220 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 221 const RegionCodeGenTy &CodeGen, 222 OpenMPDirectiveKind Kind, bool HasCancel) 223 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 224 OldCSI(OldCSI), 225 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 226 227 // \brief Retrieve the value of the context parameter. 228 llvm::Value *getContextValue() const override { 229 if (OuterRegionInfo) 230 return OuterRegionInfo->getContextValue(); 231 llvm_unreachable("No context value for inlined OpenMP region"); 232 } 233 234 void setContextValue(llvm::Value *V) override { 235 if (OuterRegionInfo) { 236 OuterRegionInfo->setContextValue(V); 237 return; 238 } 239 llvm_unreachable("No context value for inlined OpenMP region"); 240 } 241 242 /// \brief Lookup the captured field decl for a variable. 243 const FieldDecl *lookup(const VarDecl *VD) const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->lookup(VD); 246 // If there is no outer outlined region,no need to lookup in a list of 247 // captured variables, we can use the original one. 248 return nullptr; 249 } 250 251 FieldDecl *getThisFieldDecl() const override { 252 if (OuterRegionInfo) 253 return OuterRegionInfo->getThisFieldDecl(); 254 return nullptr; 255 } 256 257 /// \brief Get a variable or parameter for storing global thread id 258 /// inside OpenMP construct. 259 const VarDecl *getThreadIDVariable() const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->getThreadIDVariable(); 262 return nullptr; 263 } 264 265 /// \brief Get the name of the capture helper. 266 StringRef getHelperName() const override { 267 if (auto *OuterRegionInfo = getOldCSI()) 268 return OuterRegionInfo->getHelperName(); 269 llvm_unreachable("No helper name for inlined OpenMP construct"); 270 } 271 272 void emitUntiedSwitch(CodeGenFunction &CGF) override { 273 if (OuterRegionInfo) 274 OuterRegionInfo->emitUntiedSwitch(CGF); 275 } 276 277 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 278 279 static bool classof(const CGCapturedStmtInfo *Info) { 280 return CGOpenMPRegionInfo::classof(Info) && 281 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 282 } 283 284 ~CGOpenMPInlinedRegionInfo() override = default; 285 286 private: 287 /// \brief CodeGen info about outer OpenMP region. 288 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 289 CGOpenMPRegionInfo *OuterRegionInfo; 290 }; 291 292 /// \brief API for captured statement code generation in OpenMP target 293 /// constructs. For this captures, implicit parameters are used instead of the 294 /// captured fields. The name of the target region has to be unique in a given 295 /// application so it is provided by the client, because only the client has 296 /// the information to generate that. 297 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 298 public: 299 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 300 const RegionCodeGenTy &CodeGen, StringRef HelperName) 301 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 302 /*HasCancel=*/false), 303 HelperName(HelperName) {} 304 305 /// \brief This is unused for target regions because each starts executing 306 /// with a single thread. 307 const VarDecl *getThreadIDVariable() const override { return nullptr; } 308 309 /// \brief Get the name of the capture helper. 310 StringRef getHelperName() const override { return HelperName; } 311 312 static bool classof(const CGCapturedStmtInfo *Info) { 313 return CGOpenMPRegionInfo::classof(Info) && 314 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 315 } 316 317 private: 318 StringRef HelperName; 319 }; 320 321 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 322 llvm_unreachable("No codegen for expressions"); 323 } 324 /// \brief API for generation of expressions captured in a innermost OpenMP 325 /// region. 326 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 327 public: 328 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 329 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 330 OMPD_unknown, 331 /*HasCancel=*/false), 332 PrivScope(CGF) { 333 // Make sure the globals captured in the provided statement are local by 334 // using the privatization logic. We assume the same variable is not 335 // captured more than once. 336 for (auto &C : CS.captures()) { 337 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 338 continue; 339 340 const VarDecl *VD = C.getCapturedVar(); 341 if (VD->isLocalVarDeclOrParm()) 342 continue; 343 344 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 345 /*RefersToEnclosingVariableOrCapture=*/false, 346 VD->getType().getNonReferenceType(), VK_LValue, 347 SourceLocation()); 348 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 349 return CGF.EmitLValue(&DRE).getAddress(); 350 }); 351 } 352 (void)PrivScope.Privatize(); 353 } 354 355 /// \brief Lookup the captured field decl for a variable. 356 const FieldDecl *lookup(const VarDecl *VD) const override { 357 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 358 return FD; 359 return nullptr; 360 } 361 362 /// \brief Emit the captured statement body. 363 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 364 llvm_unreachable("No body for expressions"); 365 } 366 367 /// \brief Get a variable or parameter for storing global thread id 368 /// inside OpenMP construct. 369 const VarDecl *getThreadIDVariable() const override { 370 llvm_unreachable("No thread id for expressions"); 371 } 372 373 /// \brief Get the name of the capture helper. 374 StringRef getHelperName() const override { 375 llvm_unreachable("No helper name for expressions"); 376 } 377 378 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 379 380 private: 381 /// Private scope to capture global variables. 382 CodeGenFunction::OMPPrivateScope PrivScope; 383 }; 384 385 /// \brief RAII for emitting code of OpenMP constructs. 386 class InlinedOpenMPRegionRAII { 387 CodeGenFunction &CGF; 388 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 389 FieldDecl *LambdaThisCaptureField = nullptr; 390 391 public: 392 /// \brief Constructs region for combined constructs. 393 /// \param CodeGen Code generation sequence for combined directives. Includes 394 /// a list of functions used for code generation of implicitly inlined 395 /// regions. 396 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 397 OpenMPDirectiveKind Kind, bool HasCancel) 398 : CGF(CGF) { 399 // Start emission for the construct. 400 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 401 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 402 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 403 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 404 CGF.LambdaThisCaptureField = nullptr; 405 } 406 407 ~InlinedOpenMPRegionRAII() { 408 // Restore original CapturedStmtInfo only if we're done with code emission. 409 auto *OldCSI = 410 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 411 delete CGF.CapturedStmtInfo; 412 CGF.CapturedStmtInfo = OldCSI; 413 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 414 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 415 } 416 }; 417 418 /// \brief Values for bit flags used in the ident_t to describe the fields. 419 /// All enumeric elements are named and described in accordance with the code 420 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 421 enum OpenMPLocationFlags { 422 /// \brief Use trampoline for internal microtask. 423 OMP_IDENT_IMD = 0x01, 424 /// \brief Use c-style ident structure. 425 OMP_IDENT_KMPC = 0x02, 426 /// \brief Atomic reduction option for kmpc_reduce. 427 OMP_ATOMIC_REDUCE = 0x10, 428 /// \brief Explicit 'barrier' directive. 429 OMP_IDENT_BARRIER_EXPL = 0x20, 430 /// \brief Implicit barrier in code. 431 OMP_IDENT_BARRIER_IMPL = 0x40, 432 /// \brief Implicit barrier in 'for' directive. 433 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 434 /// \brief Implicit barrier in 'sections' directive. 435 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 436 /// \brief Implicit barrier in 'single' directive. 437 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 438 }; 439 440 /// \brief Describes ident structure that describes a source location. 441 /// All descriptions are taken from 442 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 443 /// Original structure: 444 /// typedef struct ident { 445 /// kmp_int32 reserved_1; /**< might be used in Fortran; 446 /// see above */ 447 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 448 /// KMP_IDENT_KMPC identifies this union 449 /// member */ 450 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 451 /// see above */ 452 ///#if USE_ITT_BUILD 453 /// /* but currently used for storing 454 /// region-specific ITT */ 455 /// /* contextual information. */ 456 ///#endif /* USE_ITT_BUILD */ 457 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 458 /// C++ */ 459 /// char const *psource; /**< String describing the source location. 460 /// The string is composed of semi-colon separated 461 // fields which describe the source file, 462 /// the function and a pair of line numbers that 463 /// delimit the construct. 464 /// */ 465 /// } ident_t; 466 enum IdentFieldIndex { 467 /// \brief might be used in Fortran 468 IdentField_Reserved_1, 469 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 470 IdentField_Flags, 471 /// \brief Not really used in Fortran any more 472 IdentField_Reserved_2, 473 /// \brief Source[4] in Fortran, do not use for C++ 474 IdentField_Reserved_3, 475 /// \brief String describing the source location. The string is composed of 476 /// semi-colon separated fields which describe the source file, the function 477 /// and a pair of line numbers that delimit the construct. 478 IdentField_PSource 479 }; 480 481 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 482 /// the enum sched_type in kmp.h). 483 enum OpenMPSchedType { 484 /// \brief Lower bound for default (unordered) versions. 485 OMP_sch_lower = 32, 486 OMP_sch_static_chunked = 33, 487 OMP_sch_static = 34, 488 OMP_sch_dynamic_chunked = 35, 489 OMP_sch_guided_chunked = 36, 490 OMP_sch_runtime = 37, 491 OMP_sch_auto = 38, 492 /// static with chunk adjustment (e.g., simd) 493 OMP_sch_static_balanced_chunked = 45, 494 /// \brief Lower bound for 'ordered' versions. 495 OMP_ord_lower = 64, 496 OMP_ord_static_chunked = 65, 497 OMP_ord_static = 66, 498 OMP_ord_dynamic_chunked = 67, 499 OMP_ord_guided_chunked = 68, 500 OMP_ord_runtime = 69, 501 OMP_ord_auto = 70, 502 OMP_sch_default = OMP_sch_static, 503 /// \brief dist_schedule types 504 OMP_dist_sch_static_chunked = 91, 505 OMP_dist_sch_static = 92, 506 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 507 /// Set if the monotonic schedule modifier was present. 508 OMP_sch_modifier_monotonic = (1 << 29), 509 /// Set if the nonmonotonic schedule modifier was present. 510 OMP_sch_modifier_nonmonotonic = (1 << 30), 511 }; 512 513 enum OpenMPRTLFunction { 514 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 515 /// kmpc_micro microtask, ...); 516 OMPRTL__kmpc_fork_call, 517 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 518 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 519 OMPRTL__kmpc_threadprivate_cached, 520 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 521 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 522 OMPRTL__kmpc_threadprivate_register, 523 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 524 OMPRTL__kmpc_global_thread_num, 525 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 526 // kmp_critical_name *crit); 527 OMPRTL__kmpc_critical, 528 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 529 // global_tid, kmp_critical_name *crit, uintptr_t hint); 530 OMPRTL__kmpc_critical_with_hint, 531 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 532 // kmp_critical_name *crit); 533 OMPRTL__kmpc_end_critical, 534 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 535 // global_tid); 536 OMPRTL__kmpc_cancel_barrier, 537 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 538 OMPRTL__kmpc_barrier, 539 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 540 OMPRTL__kmpc_for_static_fini, 541 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 542 // global_tid); 543 OMPRTL__kmpc_serialized_parallel, 544 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 545 // global_tid); 546 OMPRTL__kmpc_end_serialized_parallel, 547 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 548 // kmp_int32 num_threads); 549 OMPRTL__kmpc_push_num_threads, 550 // Call to void __kmpc_flush(ident_t *loc); 551 OMPRTL__kmpc_flush, 552 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 553 OMPRTL__kmpc_master, 554 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 555 OMPRTL__kmpc_end_master, 556 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 557 // int end_part); 558 OMPRTL__kmpc_omp_taskyield, 559 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 560 OMPRTL__kmpc_single, 561 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 562 OMPRTL__kmpc_end_single, 563 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 564 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 565 // kmp_routine_entry_t *task_entry); 566 OMPRTL__kmpc_omp_task_alloc, 567 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 568 // new_task); 569 OMPRTL__kmpc_omp_task, 570 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 571 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 572 // kmp_int32 didit); 573 OMPRTL__kmpc_copyprivate, 574 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 575 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 576 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 577 OMPRTL__kmpc_reduce, 578 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 579 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 580 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 581 // *lck); 582 OMPRTL__kmpc_reduce_nowait, 583 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 584 // kmp_critical_name *lck); 585 OMPRTL__kmpc_end_reduce, 586 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 587 // kmp_critical_name *lck); 588 OMPRTL__kmpc_end_reduce_nowait, 589 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 590 // kmp_task_t * new_task); 591 OMPRTL__kmpc_omp_task_begin_if0, 592 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 593 // kmp_task_t * new_task); 594 OMPRTL__kmpc_omp_task_complete_if0, 595 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 596 OMPRTL__kmpc_ordered, 597 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 598 OMPRTL__kmpc_end_ordered, 599 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 600 // global_tid); 601 OMPRTL__kmpc_omp_taskwait, 602 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 603 OMPRTL__kmpc_taskgroup, 604 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 605 OMPRTL__kmpc_end_taskgroup, 606 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 607 // int proc_bind); 608 OMPRTL__kmpc_push_proc_bind, 609 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 610 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 611 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 612 OMPRTL__kmpc_omp_task_with_deps, 613 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 614 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 615 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 616 OMPRTL__kmpc_omp_wait_deps, 617 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 618 // global_tid, kmp_int32 cncl_kind); 619 OMPRTL__kmpc_cancellationpoint, 620 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 621 // kmp_int32 cncl_kind); 622 OMPRTL__kmpc_cancel, 623 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 624 // kmp_int32 num_teams, kmp_int32 thread_limit); 625 OMPRTL__kmpc_push_num_teams, 626 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 627 // microtask, ...); 628 OMPRTL__kmpc_fork_teams, 629 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 630 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 631 // sched, kmp_uint64 grainsize, void *task_dup); 632 OMPRTL__kmpc_taskloop, 633 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 634 // num_dims, struct kmp_dim *dims); 635 OMPRTL__kmpc_doacross_init, 636 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 637 OMPRTL__kmpc_doacross_fini, 638 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 639 // *vec); 640 OMPRTL__kmpc_doacross_post, 641 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 642 // *vec); 643 OMPRTL__kmpc_doacross_wait, 644 645 // 646 // Offloading related calls 647 // 648 // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 649 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 650 // *arg_types); 651 OMPRTL__tgt_target, 652 // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 653 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 654 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 655 OMPRTL__tgt_target_teams, 656 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 657 OMPRTL__tgt_register_lib, 658 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 659 OMPRTL__tgt_unregister_lib, 660 // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 661 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 662 OMPRTL__tgt_target_data_begin, 663 // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 664 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 665 OMPRTL__tgt_target_data_end, 666 // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 667 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 668 OMPRTL__tgt_target_data_update, 669 }; 670 671 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 672 /// region. 673 class CleanupTy final : public EHScopeStack::Cleanup { 674 PrePostActionTy *Action; 675 676 public: 677 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 678 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 679 if (!CGF.HaveInsertPoint()) 680 return; 681 Action->Exit(CGF); 682 } 683 }; 684 685 } // anonymous namespace 686 687 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 688 CodeGenFunction::RunCleanupsScope Scope(CGF); 689 if (PrePostAction) { 690 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 691 Callback(CodeGen, CGF, *PrePostAction); 692 } else { 693 PrePostActionTy Action; 694 Callback(CodeGen, CGF, Action); 695 } 696 } 697 698 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 699 return CGF.EmitLoadOfPointerLValue( 700 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 701 getThreadIDVariable()->getType()->castAs<PointerType>()); 702 } 703 704 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 705 if (!CGF.HaveInsertPoint()) 706 return; 707 // 1.2.2 OpenMP Language Terminology 708 // Structured block - An executable statement with a single entry at the 709 // top and a single exit at the bottom. 710 // The point of exit cannot be a branch out of the structured block. 711 // longjmp() and throw() must not violate the entry/exit criteria. 712 CGF.EHStack.pushTerminate(); 713 CodeGen(CGF); 714 CGF.EHStack.popTerminate(); 715 } 716 717 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 718 CodeGenFunction &CGF) { 719 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 720 getThreadIDVariable()->getType(), 721 AlignmentSource::Decl); 722 } 723 724 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 725 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 726 IdentTy = llvm::StructType::create( 727 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 728 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 729 CGM.Int8PtrTy /* psource */, nullptr); 730 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 731 732 loadOffloadInfoMetadata(); 733 } 734 735 void CGOpenMPRuntime::clear() { 736 InternalVars.clear(); 737 } 738 739 static llvm::Function * 740 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 741 const Expr *CombinerInitializer, const VarDecl *In, 742 const VarDecl *Out, bool IsCombiner) { 743 // void .omp_combiner.(Ty *in, Ty *out); 744 auto &C = CGM.getContext(); 745 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 746 FunctionArgList Args; 747 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 748 /*Id=*/nullptr, PtrTy); 749 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 750 /*Id=*/nullptr, PtrTy); 751 Args.push_back(&OmpOutParm); 752 Args.push_back(&OmpInParm); 753 auto &FnInfo = 754 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 755 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 756 auto *Fn = llvm::Function::Create( 757 FnTy, llvm::GlobalValue::InternalLinkage, 758 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 759 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 760 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 761 CodeGenFunction CGF(CGM); 762 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 763 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 764 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 765 CodeGenFunction::OMPPrivateScope Scope(CGF); 766 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 767 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 768 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 769 .getAddress(); 770 }); 771 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 772 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 773 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 774 .getAddress(); 775 }); 776 (void)Scope.Privatize(); 777 CGF.EmitIgnoredExpr(CombinerInitializer); 778 Scope.ForceCleanup(); 779 CGF.FinishFunction(); 780 return Fn; 781 } 782 783 void CGOpenMPRuntime::emitUserDefinedReduction( 784 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 785 if (UDRMap.count(D) > 0) 786 return; 787 auto &C = CGM.getContext(); 788 if (!In || !Out) { 789 In = &C.Idents.get("omp_in"); 790 Out = &C.Idents.get("omp_out"); 791 } 792 llvm::Function *Combiner = emitCombinerOrInitializer( 793 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 794 cast<VarDecl>(D->lookup(Out).front()), 795 /*IsCombiner=*/true); 796 llvm::Function *Initializer = nullptr; 797 if (auto *Init = D->getInitializer()) { 798 if (!Priv || !Orig) { 799 Priv = &C.Idents.get("omp_priv"); 800 Orig = &C.Idents.get("omp_orig"); 801 } 802 Initializer = emitCombinerOrInitializer( 803 CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), 804 cast<VarDecl>(D->lookup(Priv).front()), 805 /*IsCombiner=*/false); 806 } 807 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 808 if (CGF) { 809 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 810 Decls.second.push_back(D); 811 } 812 } 813 814 std::pair<llvm::Function *, llvm::Function *> 815 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 816 auto I = UDRMap.find(D); 817 if (I != UDRMap.end()) 818 return I->second; 819 emitUserDefinedReduction(/*CGF=*/nullptr, D); 820 return UDRMap.lookup(D); 821 } 822 823 // Layout information for ident_t. 824 static CharUnits getIdentAlign(CodeGenModule &CGM) { 825 return CGM.getPointerAlign(); 826 } 827 static CharUnits getIdentSize(CodeGenModule &CGM) { 828 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 829 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 830 } 831 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 832 // All the fields except the last are i32, so this works beautifully. 833 return unsigned(Field) * CharUnits::fromQuantity(4); 834 } 835 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 836 IdentFieldIndex Field, 837 const llvm::Twine &Name = "") { 838 auto Offset = getOffsetOfIdentField(Field); 839 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 840 } 841 842 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( 843 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 844 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 845 assert(ThreadIDVar->getType()->isPointerType() && 846 "thread id variable must be of type kmp_int32 *"); 847 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 848 CodeGenFunction CGF(CGM, true); 849 bool HasCancel = false; 850 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 851 HasCancel = OPD->hasCancel(); 852 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 853 HasCancel = OPSD->hasCancel(); 854 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 855 HasCancel = OPFD->hasCancel(); 856 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 857 HasCancel); 858 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 859 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 860 } 861 862 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 863 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 864 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 865 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 866 bool Tied, unsigned &NumberOfParts) { 867 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 868 PrePostActionTy &) { 869 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 870 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 871 llvm::Value *TaskArgs[] = { 872 UpLoc, ThreadID, 873 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 874 TaskTVar->getType()->castAs<PointerType>()) 875 .getPointer()}; 876 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 877 }; 878 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 879 UntiedCodeGen); 880 CodeGen.setAction(Action); 881 assert(!ThreadIDVar->getType()->isPointerType() && 882 "thread id variable must be of type kmp_int32 for tasks"); 883 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 884 auto *TD = dyn_cast<OMPTaskDirective>(&D); 885 CodeGenFunction CGF(CGM, true); 886 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 887 InnermostKind, 888 TD ? TD->hasCancel() : false, Action); 889 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 890 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 891 if (!Tied) 892 NumberOfParts = Action.getNumberOfParts(); 893 return Res; 894 } 895 896 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 897 CharUnits Align = getIdentAlign(CGM); 898 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 899 if (!Entry) { 900 if (!DefaultOpenMPPSource) { 901 // Initialize default location for psource field of ident_t structure of 902 // all ident_t objects. Format is ";file;function;line;column;;". 903 // Taken from 904 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 905 DefaultOpenMPPSource = 906 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 907 DefaultOpenMPPSource = 908 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 909 } 910 911 ConstantInitBuilder builder(CGM); 912 auto fields = builder.beginStruct(IdentTy); 913 fields.addInt(CGM.Int32Ty, 0); 914 fields.addInt(CGM.Int32Ty, Flags); 915 fields.addInt(CGM.Int32Ty, 0); 916 fields.addInt(CGM.Int32Ty, 0); 917 fields.add(DefaultOpenMPPSource); 918 auto DefaultOpenMPLocation = 919 fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, 920 llvm::GlobalValue::PrivateLinkage); 921 DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 922 923 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 924 } 925 return Address(Entry, Align); 926 } 927 928 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 929 SourceLocation Loc, 930 unsigned Flags) { 931 Flags |= OMP_IDENT_KMPC; 932 // If no debug info is generated - return global default location. 933 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 934 Loc.isInvalid()) 935 return getOrCreateDefaultLocation(Flags).getPointer(); 936 937 assert(CGF.CurFn && "No function in current CodeGenFunction."); 938 939 Address LocValue = Address::invalid(); 940 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 941 if (I != OpenMPLocThreadIDMap.end()) 942 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 943 944 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 945 // GetOpenMPThreadID was called before this routine. 946 if (!LocValue.isValid()) { 947 // Generate "ident_t .kmpc_loc.addr;" 948 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 949 ".kmpc_loc.addr"); 950 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 951 Elem.second.DebugLoc = AI.getPointer(); 952 LocValue = AI; 953 954 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 955 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 956 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 957 CGM.getSize(getIdentSize(CGF.CGM))); 958 } 959 960 // char **psource = &.kmpc_loc_<flags>.addr.psource; 961 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 962 963 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 964 if (OMPDebugLoc == nullptr) { 965 SmallString<128> Buffer2; 966 llvm::raw_svector_ostream OS2(Buffer2); 967 // Build debug location 968 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 969 OS2 << ";" << PLoc.getFilename() << ";"; 970 if (const FunctionDecl *FD = 971 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 972 OS2 << FD->getQualifiedNameAsString(); 973 } 974 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 975 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 976 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 977 } 978 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 979 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 980 981 // Our callers always pass this to a runtime function, so for 982 // convenience, go ahead and return a naked pointer. 983 return LocValue.getPointer(); 984 } 985 986 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 987 SourceLocation Loc) { 988 assert(CGF.CurFn && "No function in current CodeGenFunction."); 989 990 llvm::Value *ThreadID = nullptr; 991 // Check whether we've already cached a load of the thread id in this 992 // function. 993 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 994 if (I != OpenMPLocThreadIDMap.end()) { 995 ThreadID = I->second.ThreadID; 996 if (ThreadID != nullptr) 997 return ThreadID; 998 } 999 if (auto *OMPRegionInfo = 1000 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1001 if (OMPRegionInfo->getThreadIDVariable()) { 1002 // Check if this an outlined function with thread id passed as argument. 1003 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1004 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 1005 // If value loaded in entry block, cache it and use it everywhere in 1006 // function. 1007 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1008 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1009 Elem.second.ThreadID = ThreadID; 1010 } 1011 return ThreadID; 1012 } 1013 } 1014 1015 // This is not an outlined function region - need to call __kmpc_int32 1016 // kmpc_global_thread_num(ident_t *loc). 1017 // Generate thread id value and cache this value for use across the 1018 // function. 1019 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1020 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1021 ThreadID = 1022 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1023 emitUpdateLocation(CGF, Loc)); 1024 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1025 Elem.second.ThreadID = ThreadID; 1026 return ThreadID; 1027 } 1028 1029 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1030 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1031 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1032 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1033 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1034 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1035 UDRMap.erase(D); 1036 } 1037 FunctionUDRMap.erase(CGF.CurFn); 1038 } 1039 } 1040 1041 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1042 if (!IdentTy) { 1043 } 1044 return llvm::PointerType::getUnqual(IdentTy); 1045 } 1046 1047 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1048 if (!Kmpc_MicroTy) { 1049 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1050 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1051 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1052 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1053 } 1054 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1055 } 1056 1057 llvm::Constant * 1058 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1059 llvm::Constant *RTLFn = nullptr; 1060 switch (static_cast<OpenMPRTLFunction>(Function)) { 1061 case OMPRTL__kmpc_fork_call: { 1062 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1063 // microtask, ...); 1064 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1065 getKmpc_MicroPointerTy()}; 1066 llvm::FunctionType *FnTy = 1067 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1068 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1069 break; 1070 } 1071 case OMPRTL__kmpc_global_thread_num: { 1072 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1073 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1074 llvm::FunctionType *FnTy = 1075 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1076 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1077 break; 1078 } 1079 case OMPRTL__kmpc_threadprivate_cached: { 1080 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1081 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1082 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1083 CGM.VoidPtrTy, CGM.SizeTy, 1084 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1085 llvm::FunctionType *FnTy = 1086 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1087 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1088 break; 1089 } 1090 case OMPRTL__kmpc_critical: { 1091 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1092 // kmp_critical_name *crit); 1093 llvm::Type *TypeParams[] = { 1094 getIdentTyPointerTy(), CGM.Int32Ty, 1095 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1096 llvm::FunctionType *FnTy = 1097 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1098 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1099 break; 1100 } 1101 case OMPRTL__kmpc_critical_with_hint: { 1102 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1103 // kmp_critical_name *crit, uintptr_t hint); 1104 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1105 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1106 CGM.IntPtrTy}; 1107 llvm::FunctionType *FnTy = 1108 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1109 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1110 break; 1111 } 1112 case OMPRTL__kmpc_threadprivate_register: { 1113 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1114 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1115 // typedef void *(*kmpc_ctor)(void *); 1116 auto KmpcCtorTy = 1117 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1118 /*isVarArg*/ false)->getPointerTo(); 1119 // typedef void *(*kmpc_cctor)(void *, void *); 1120 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1121 auto KmpcCopyCtorTy = 1122 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1123 /*isVarArg*/ false)->getPointerTo(); 1124 // typedef void (*kmpc_dtor)(void *); 1125 auto KmpcDtorTy = 1126 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1127 ->getPointerTo(); 1128 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1129 KmpcCopyCtorTy, KmpcDtorTy}; 1130 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1131 /*isVarArg*/ false); 1132 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1133 break; 1134 } 1135 case OMPRTL__kmpc_end_critical: { 1136 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1137 // kmp_critical_name *crit); 1138 llvm::Type *TypeParams[] = { 1139 getIdentTyPointerTy(), CGM.Int32Ty, 1140 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1141 llvm::FunctionType *FnTy = 1142 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1143 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1144 break; 1145 } 1146 case OMPRTL__kmpc_cancel_barrier: { 1147 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1148 // global_tid); 1149 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1150 llvm::FunctionType *FnTy = 1151 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1152 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1153 break; 1154 } 1155 case OMPRTL__kmpc_barrier: { 1156 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1157 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1158 llvm::FunctionType *FnTy = 1159 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1160 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1161 break; 1162 } 1163 case OMPRTL__kmpc_for_static_fini: { 1164 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1165 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1166 llvm::FunctionType *FnTy = 1167 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1168 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1169 break; 1170 } 1171 case OMPRTL__kmpc_push_num_threads: { 1172 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1173 // kmp_int32 num_threads) 1174 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1175 CGM.Int32Ty}; 1176 llvm::FunctionType *FnTy = 1177 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1178 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1179 break; 1180 } 1181 case OMPRTL__kmpc_serialized_parallel: { 1182 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1183 // global_tid); 1184 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1185 llvm::FunctionType *FnTy = 1186 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1187 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1188 break; 1189 } 1190 case OMPRTL__kmpc_end_serialized_parallel: { 1191 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1192 // global_tid); 1193 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1194 llvm::FunctionType *FnTy = 1195 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1196 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1197 break; 1198 } 1199 case OMPRTL__kmpc_flush: { 1200 // Build void __kmpc_flush(ident_t *loc); 1201 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1202 llvm::FunctionType *FnTy = 1203 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1204 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1205 break; 1206 } 1207 case OMPRTL__kmpc_master: { 1208 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1209 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1210 llvm::FunctionType *FnTy = 1211 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1212 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1213 break; 1214 } 1215 case OMPRTL__kmpc_end_master: { 1216 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1217 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1218 llvm::FunctionType *FnTy = 1219 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1220 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1221 break; 1222 } 1223 case OMPRTL__kmpc_omp_taskyield: { 1224 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1225 // int end_part); 1226 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1227 llvm::FunctionType *FnTy = 1228 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1229 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1230 break; 1231 } 1232 case OMPRTL__kmpc_single: { 1233 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1234 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1235 llvm::FunctionType *FnTy = 1236 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1237 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1238 break; 1239 } 1240 case OMPRTL__kmpc_end_single: { 1241 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1242 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1243 llvm::FunctionType *FnTy = 1244 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1245 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1246 break; 1247 } 1248 case OMPRTL__kmpc_omp_task_alloc: { 1249 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1250 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1251 // kmp_routine_entry_t *task_entry); 1252 assert(KmpRoutineEntryPtrTy != nullptr && 1253 "Type kmp_routine_entry_t must be created."); 1254 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1255 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1256 // Return void * and then cast to particular kmp_task_t type. 1257 llvm::FunctionType *FnTy = 1258 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1259 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1260 break; 1261 } 1262 case OMPRTL__kmpc_omp_task: { 1263 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1264 // *new_task); 1265 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1266 CGM.VoidPtrTy}; 1267 llvm::FunctionType *FnTy = 1268 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1269 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1270 break; 1271 } 1272 case OMPRTL__kmpc_copyprivate: { 1273 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1274 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1275 // kmp_int32 didit); 1276 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1277 auto *CpyFnTy = 1278 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1279 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1280 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1281 CGM.Int32Ty}; 1282 llvm::FunctionType *FnTy = 1283 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1284 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1285 break; 1286 } 1287 case OMPRTL__kmpc_reduce: { 1288 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1289 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1290 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1291 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1292 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1293 /*isVarArg=*/false); 1294 llvm::Type *TypeParams[] = { 1295 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1296 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1297 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1298 llvm::FunctionType *FnTy = 1299 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1300 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1301 break; 1302 } 1303 case OMPRTL__kmpc_reduce_nowait: { 1304 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1305 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1306 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1307 // *lck); 1308 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1309 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1310 /*isVarArg=*/false); 1311 llvm::Type *TypeParams[] = { 1312 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1313 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1314 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1315 llvm::FunctionType *FnTy = 1316 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1317 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1318 break; 1319 } 1320 case OMPRTL__kmpc_end_reduce: { 1321 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1322 // kmp_critical_name *lck); 1323 llvm::Type *TypeParams[] = { 1324 getIdentTyPointerTy(), CGM.Int32Ty, 1325 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1326 llvm::FunctionType *FnTy = 1327 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1328 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1329 break; 1330 } 1331 case OMPRTL__kmpc_end_reduce_nowait: { 1332 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1333 // kmp_critical_name *lck); 1334 llvm::Type *TypeParams[] = { 1335 getIdentTyPointerTy(), CGM.Int32Ty, 1336 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1337 llvm::FunctionType *FnTy = 1338 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1339 RTLFn = 1340 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1341 break; 1342 } 1343 case OMPRTL__kmpc_omp_task_begin_if0: { 1344 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1345 // *new_task); 1346 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1347 CGM.VoidPtrTy}; 1348 llvm::FunctionType *FnTy = 1349 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1350 RTLFn = 1351 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1352 break; 1353 } 1354 case OMPRTL__kmpc_omp_task_complete_if0: { 1355 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1356 // *new_task); 1357 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1358 CGM.VoidPtrTy}; 1359 llvm::FunctionType *FnTy = 1360 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1361 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1362 /*Name=*/"__kmpc_omp_task_complete_if0"); 1363 break; 1364 } 1365 case OMPRTL__kmpc_ordered: { 1366 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1367 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1368 llvm::FunctionType *FnTy = 1369 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1370 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1371 break; 1372 } 1373 case OMPRTL__kmpc_end_ordered: { 1374 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1375 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1376 llvm::FunctionType *FnTy = 1377 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1378 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1379 break; 1380 } 1381 case OMPRTL__kmpc_omp_taskwait: { 1382 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1383 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1384 llvm::FunctionType *FnTy = 1385 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1386 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1387 break; 1388 } 1389 case OMPRTL__kmpc_taskgroup: { 1390 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1391 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1392 llvm::FunctionType *FnTy = 1393 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1394 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1395 break; 1396 } 1397 case OMPRTL__kmpc_end_taskgroup: { 1398 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1399 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1400 llvm::FunctionType *FnTy = 1401 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1402 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1403 break; 1404 } 1405 case OMPRTL__kmpc_push_proc_bind: { 1406 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1407 // int proc_bind) 1408 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1409 llvm::FunctionType *FnTy = 1410 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1411 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1412 break; 1413 } 1414 case OMPRTL__kmpc_omp_task_with_deps: { 1415 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1416 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1417 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1418 llvm::Type *TypeParams[] = { 1419 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1420 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1421 llvm::FunctionType *FnTy = 1422 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1423 RTLFn = 1424 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1425 break; 1426 } 1427 case OMPRTL__kmpc_omp_wait_deps: { 1428 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1429 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1430 // kmp_depend_info_t *noalias_dep_list); 1431 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1432 CGM.Int32Ty, CGM.VoidPtrTy, 1433 CGM.Int32Ty, CGM.VoidPtrTy}; 1434 llvm::FunctionType *FnTy = 1435 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1436 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1437 break; 1438 } 1439 case OMPRTL__kmpc_cancellationpoint: { 1440 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1441 // global_tid, kmp_int32 cncl_kind) 1442 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1443 llvm::FunctionType *FnTy = 1444 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1445 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1446 break; 1447 } 1448 case OMPRTL__kmpc_cancel: { 1449 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1450 // kmp_int32 cncl_kind) 1451 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1452 llvm::FunctionType *FnTy = 1453 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1454 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1455 break; 1456 } 1457 case OMPRTL__kmpc_push_num_teams: { 1458 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1459 // kmp_int32 num_teams, kmp_int32 num_threads) 1460 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1461 CGM.Int32Ty}; 1462 llvm::FunctionType *FnTy = 1463 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1464 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1465 break; 1466 } 1467 case OMPRTL__kmpc_fork_teams: { 1468 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1469 // microtask, ...); 1470 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1471 getKmpc_MicroPointerTy()}; 1472 llvm::FunctionType *FnTy = 1473 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1474 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1475 break; 1476 } 1477 case OMPRTL__kmpc_taskloop: { 1478 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1479 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1480 // sched, kmp_uint64 grainsize, void *task_dup); 1481 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1482 CGM.IntTy, 1483 CGM.VoidPtrTy, 1484 CGM.IntTy, 1485 CGM.Int64Ty->getPointerTo(), 1486 CGM.Int64Ty->getPointerTo(), 1487 CGM.Int64Ty, 1488 CGM.IntTy, 1489 CGM.IntTy, 1490 CGM.Int64Ty, 1491 CGM.VoidPtrTy}; 1492 llvm::FunctionType *FnTy = 1493 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1494 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 1495 break; 1496 } 1497 case OMPRTL__kmpc_doacross_init: { 1498 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 1499 // num_dims, struct kmp_dim *dims); 1500 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1501 CGM.Int32Ty, 1502 CGM.Int32Ty, 1503 CGM.VoidPtrTy}; 1504 llvm::FunctionType *FnTy = 1505 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1506 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 1507 break; 1508 } 1509 case OMPRTL__kmpc_doacross_fini: { 1510 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 1511 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1512 llvm::FunctionType *FnTy = 1513 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1514 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 1515 break; 1516 } 1517 case OMPRTL__kmpc_doacross_post: { 1518 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 1519 // *vec); 1520 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1521 CGM.Int64Ty->getPointerTo()}; 1522 llvm::FunctionType *FnTy = 1523 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1524 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 1525 break; 1526 } 1527 case OMPRTL__kmpc_doacross_wait: { 1528 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 1529 // *vec); 1530 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1531 CGM.Int64Ty->getPointerTo()}; 1532 llvm::FunctionType *FnTy = 1533 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1534 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 1535 break; 1536 } 1537 case OMPRTL__tgt_target: { 1538 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 1539 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 1540 // *arg_types); 1541 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1542 CGM.VoidPtrTy, 1543 CGM.Int32Ty, 1544 CGM.VoidPtrPtrTy, 1545 CGM.VoidPtrPtrTy, 1546 CGM.SizeTy->getPointerTo(), 1547 CGM.Int32Ty->getPointerTo()}; 1548 llvm::FunctionType *FnTy = 1549 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1550 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 1551 break; 1552 } 1553 case OMPRTL__tgt_target_teams: { 1554 // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 1555 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 1556 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 1557 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1558 CGM.VoidPtrTy, 1559 CGM.Int32Ty, 1560 CGM.VoidPtrPtrTy, 1561 CGM.VoidPtrPtrTy, 1562 CGM.SizeTy->getPointerTo(), 1563 CGM.Int32Ty->getPointerTo(), 1564 CGM.Int32Ty, 1565 CGM.Int32Ty}; 1566 llvm::FunctionType *FnTy = 1567 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1568 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 1569 break; 1570 } 1571 case OMPRTL__tgt_register_lib: { 1572 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 1573 QualType ParamTy = 1574 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1575 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1576 llvm::FunctionType *FnTy = 1577 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1578 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 1579 break; 1580 } 1581 case OMPRTL__tgt_unregister_lib: { 1582 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 1583 QualType ParamTy = 1584 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1585 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1586 llvm::FunctionType *FnTy = 1587 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1588 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 1589 break; 1590 } 1591 case OMPRTL__tgt_target_data_begin: { 1592 // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 1593 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1594 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1595 CGM.Int32Ty, 1596 CGM.VoidPtrPtrTy, 1597 CGM.VoidPtrPtrTy, 1598 CGM.SizeTy->getPointerTo(), 1599 CGM.Int32Ty->getPointerTo()}; 1600 llvm::FunctionType *FnTy = 1601 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1602 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 1603 break; 1604 } 1605 case OMPRTL__tgt_target_data_end: { 1606 // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 1607 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1608 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1609 CGM.Int32Ty, 1610 CGM.VoidPtrPtrTy, 1611 CGM.VoidPtrPtrTy, 1612 CGM.SizeTy->getPointerTo(), 1613 CGM.Int32Ty->getPointerTo()}; 1614 llvm::FunctionType *FnTy = 1615 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1616 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 1617 break; 1618 } 1619 case OMPRTL__tgt_target_data_update: { 1620 // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 1621 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1622 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1623 CGM.Int32Ty, 1624 CGM.VoidPtrPtrTy, 1625 CGM.VoidPtrPtrTy, 1626 CGM.SizeTy->getPointerTo(), 1627 CGM.Int32Ty->getPointerTo()}; 1628 llvm::FunctionType *FnTy = 1629 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1630 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 1631 break; 1632 } 1633 } 1634 assert(RTLFn && "Unable to find OpenMP runtime function"); 1635 return RTLFn; 1636 } 1637 1638 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 1639 bool IVSigned) { 1640 assert((IVSize == 32 || IVSize == 64) && 1641 "IV size is not compatible with the omp runtime"); 1642 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1643 : "__kmpc_for_static_init_4u") 1644 : (IVSigned ? "__kmpc_for_static_init_8" 1645 : "__kmpc_for_static_init_8u"); 1646 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1647 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1648 llvm::Type *TypeParams[] = { 1649 getIdentTyPointerTy(), // loc 1650 CGM.Int32Ty, // tid 1651 CGM.Int32Ty, // schedtype 1652 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1653 PtrTy, // p_lower 1654 PtrTy, // p_upper 1655 PtrTy, // p_stride 1656 ITy, // incr 1657 ITy // chunk 1658 }; 1659 llvm::FunctionType *FnTy = 1660 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1661 return CGM.CreateRuntimeFunction(FnTy, Name); 1662 } 1663 1664 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 1665 bool IVSigned) { 1666 assert((IVSize == 32 || IVSize == 64) && 1667 "IV size is not compatible with the omp runtime"); 1668 auto Name = 1669 IVSize == 32 1670 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1671 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1672 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1673 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1674 CGM.Int32Ty, // tid 1675 CGM.Int32Ty, // schedtype 1676 ITy, // lower 1677 ITy, // upper 1678 ITy, // stride 1679 ITy // chunk 1680 }; 1681 llvm::FunctionType *FnTy = 1682 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1683 return CGM.CreateRuntimeFunction(FnTy, Name); 1684 } 1685 1686 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1687 bool IVSigned) { 1688 assert((IVSize == 32 || IVSize == 64) && 1689 "IV size is not compatible with the omp runtime"); 1690 auto Name = 1691 IVSize == 32 1692 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1693 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1694 llvm::Type *TypeParams[] = { 1695 getIdentTyPointerTy(), // loc 1696 CGM.Int32Ty, // tid 1697 }; 1698 llvm::FunctionType *FnTy = 1699 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1700 return CGM.CreateRuntimeFunction(FnTy, Name); 1701 } 1702 1703 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1704 bool IVSigned) { 1705 assert((IVSize == 32 || IVSize == 64) && 1706 "IV size is not compatible with the omp runtime"); 1707 auto Name = 1708 IVSize == 32 1709 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1710 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1711 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1712 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1713 llvm::Type *TypeParams[] = { 1714 getIdentTyPointerTy(), // loc 1715 CGM.Int32Ty, // tid 1716 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1717 PtrTy, // p_lower 1718 PtrTy, // p_upper 1719 PtrTy // p_stride 1720 }; 1721 llvm::FunctionType *FnTy = 1722 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1723 return CGM.CreateRuntimeFunction(FnTy, Name); 1724 } 1725 1726 llvm::Constant * 1727 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1728 assert(!CGM.getLangOpts().OpenMPUseTLS || 1729 !CGM.getContext().getTargetInfo().isTLSSupported()); 1730 // Lookup the entry, lazily creating it if necessary. 1731 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1732 Twine(CGM.getMangledName(VD)) + ".cache."); 1733 } 1734 1735 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1736 const VarDecl *VD, 1737 Address VDAddr, 1738 SourceLocation Loc) { 1739 if (CGM.getLangOpts().OpenMPUseTLS && 1740 CGM.getContext().getTargetInfo().isTLSSupported()) 1741 return VDAddr; 1742 1743 auto VarTy = VDAddr.getElementType(); 1744 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1745 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1746 CGM.Int8PtrTy), 1747 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1748 getOrCreateThreadPrivateCache(VD)}; 1749 return Address(CGF.EmitRuntimeCall( 1750 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1751 VDAddr.getAlignment()); 1752 } 1753 1754 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1755 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1756 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1757 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1758 // library. 1759 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1760 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1761 OMPLoc); 1762 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1763 // to register constructor/destructor for variable. 1764 llvm::Value *Args[] = {OMPLoc, 1765 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1766 CGM.VoidPtrTy), 1767 Ctor, CopyCtor, Dtor}; 1768 CGF.EmitRuntimeCall( 1769 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1770 } 1771 1772 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1773 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1774 bool PerformInit, CodeGenFunction *CGF) { 1775 if (CGM.getLangOpts().OpenMPUseTLS && 1776 CGM.getContext().getTargetInfo().isTLSSupported()) 1777 return nullptr; 1778 1779 VD = VD->getDefinition(CGM.getContext()); 1780 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1781 ThreadPrivateWithDefinition.insert(VD); 1782 QualType ASTTy = VD->getType(); 1783 1784 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1785 auto Init = VD->getAnyInitializer(); 1786 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1787 // Generate function that re-emits the declaration's initializer into the 1788 // threadprivate copy of the variable VD 1789 CodeGenFunction CtorCGF(CGM); 1790 FunctionArgList Args; 1791 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1792 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1793 Args.push_back(&Dst); 1794 1795 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1796 CGM.getContext().VoidPtrTy, Args); 1797 auto FTy = CGM.getTypes().GetFunctionType(FI); 1798 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1799 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1800 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1801 Args, SourceLocation()); 1802 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1803 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1804 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1805 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1806 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1807 CtorCGF.ConvertTypeForMem(ASTTy)); 1808 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1809 /*IsInitializer=*/true); 1810 ArgVal = CtorCGF.EmitLoadOfScalar( 1811 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1812 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1813 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1814 CtorCGF.FinishFunction(); 1815 Ctor = Fn; 1816 } 1817 if (VD->getType().isDestructedType() != QualType::DK_none) { 1818 // Generate function that emits destructor call for the threadprivate copy 1819 // of the variable VD 1820 CodeGenFunction DtorCGF(CGM); 1821 FunctionArgList Args; 1822 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1823 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1824 Args.push_back(&Dst); 1825 1826 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1827 CGM.getContext().VoidTy, Args); 1828 auto FTy = CGM.getTypes().GetFunctionType(FI); 1829 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1830 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1831 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1832 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1833 SourceLocation()); 1834 // Create a scope with an artificial location for the body of this function. 1835 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1836 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1837 DtorCGF.GetAddrOfLocalVar(&Dst), 1838 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1839 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1840 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1841 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1842 DtorCGF.FinishFunction(); 1843 Dtor = Fn; 1844 } 1845 // Do not emit init function if it is not required. 1846 if (!Ctor && !Dtor) 1847 return nullptr; 1848 1849 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1850 auto CopyCtorTy = 1851 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1852 /*isVarArg=*/false)->getPointerTo(); 1853 // Copying constructor for the threadprivate variable. 1854 // Must be NULL - reserved by runtime, but currently it requires that this 1855 // parameter is always NULL. Otherwise it fires assertion. 1856 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1857 if (Ctor == nullptr) { 1858 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1859 /*isVarArg=*/false)->getPointerTo(); 1860 Ctor = llvm::Constant::getNullValue(CtorTy); 1861 } 1862 if (Dtor == nullptr) { 1863 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1864 /*isVarArg=*/false)->getPointerTo(); 1865 Dtor = llvm::Constant::getNullValue(DtorTy); 1866 } 1867 if (!CGF) { 1868 auto InitFunctionTy = 1869 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1870 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1871 InitFunctionTy, ".__omp_threadprivate_init_.", 1872 CGM.getTypes().arrangeNullaryFunction()); 1873 CodeGenFunction InitCGF(CGM); 1874 FunctionArgList ArgList; 1875 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1876 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1877 Loc); 1878 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1879 InitCGF.FinishFunction(); 1880 return InitFunction; 1881 } 1882 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1883 } 1884 return nullptr; 1885 } 1886 1887 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1888 /// function. Here is the logic: 1889 /// if (Cond) { 1890 /// ThenGen(); 1891 /// } else { 1892 /// ElseGen(); 1893 /// } 1894 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1895 const RegionCodeGenTy &ThenGen, 1896 const RegionCodeGenTy &ElseGen) { 1897 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1898 1899 // If the condition constant folds and can be elided, try to avoid emitting 1900 // the condition and the dead arm of the if/else. 1901 bool CondConstant; 1902 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1903 if (CondConstant) 1904 ThenGen(CGF); 1905 else 1906 ElseGen(CGF); 1907 return; 1908 } 1909 1910 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1911 // emit the conditional branch. 1912 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1913 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1914 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1915 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1916 1917 // Emit the 'then' code. 1918 CGF.EmitBlock(ThenBlock); 1919 ThenGen(CGF); 1920 CGF.EmitBranch(ContBlock); 1921 // Emit the 'else' code if present. 1922 // There is no need to emit line number for unconditional branch. 1923 (void)ApplyDebugLocation::CreateEmpty(CGF); 1924 CGF.EmitBlock(ElseBlock); 1925 ElseGen(CGF); 1926 // There is no need to emit line number for unconditional branch. 1927 (void)ApplyDebugLocation::CreateEmpty(CGF); 1928 CGF.EmitBranch(ContBlock); 1929 // Emit the continuation block for code after the if. 1930 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1931 } 1932 1933 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1934 llvm::Value *OutlinedFn, 1935 ArrayRef<llvm::Value *> CapturedVars, 1936 const Expr *IfCond) { 1937 if (!CGF.HaveInsertPoint()) 1938 return; 1939 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1940 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 1941 PrePostActionTy &) { 1942 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1943 auto &RT = CGF.CGM.getOpenMPRuntime(); 1944 llvm::Value *Args[] = { 1945 RTLoc, 1946 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1947 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1948 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1949 RealArgs.append(std::begin(Args), std::end(Args)); 1950 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1951 1952 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 1953 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1954 }; 1955 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 1956 PrePostActionTy &) { 1957 auto &RT = CGF.CGM.getOpenMPRuntime(); 1958 auto ThreadID = RT.getThreadID(CGF, Loc); 1959 // Build calls: 1960 // __kmpc_serialized_parallel(&Loc, GTid); 1961 llvm::Value *Args[] = {RTLoc, ThreadID}; 1962 CGF.EmitRuntimeCall( 1963 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 1964 1965 // OutlinedFn(>id, &zero, CapturedStruct); 1966 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1967 Address ZeroAddr = 1968 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1969 /*Name*/ ".zero.addr"); 1970 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1971 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1972 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1973 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1974 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1975 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1976 1977 // __kmpc_end_serialized_parallel(&Loc, GTid); 1978 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1979 CGF.EmitRuntimeCall( 1980 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 1981 EndArgs); 1982 }; 1983 if (IfCond) 1984 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1985 else { 1986 RegionCodeGenTy ThenRCG(ThenGen); 1987 ThenRCG(CGF); 1988 } 1989 } 1990 1991 // If we're inside an (outlined) parallel region, use the region info's 1992 // thread-ID variable (it is passed in a first argument of the outlined function 1993 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1994 // regular serial code region, get thread ID by calling kmp_int32 1995 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1996 // return the address of that temp. 1997 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1998 SourceLocation Loc) { 1999 if (auto *OMPRegionInfo = 2000 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2001 if (OMPRegionInfo->getThreadIDVariable()) 2002 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2003 2004 auto ThreadID = getThreadID(CGF, Loc); 2005 auto Int32Ty = 2006 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2007 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2008 CGF.EmitStoreOfScalar(ThreadID, 2009 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2010 2011 return ThreadIDTemp; 2012 } 2013 2014 llvm::Constant * 2015 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2016 const llvm::Twine &Name) { 2017 SmallString<256> Buffer; 2018 llvm::raw_svector_ostream Out(Buffer); 2019 Out << Name; 2020 auto RuntimeName = Out.str(); 2021 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 2022 if (Elem.second) { 2023 assert(Elem.second->getType()->getPointerElementType() == Ty && 2024 "OMP internal variable has different type than requested"); 2025 return &*Elem.second; 2026 } 2027 2028 return Elem.second = new llvm::GlobalVariable( 2029 CGM.getModule(), Ty, /*IsConstant*/ false, 2030 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2031 Elem.first()); 2032 } 2033 2034 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2035 llvm::Twine Name(".gomp_critical_user_", CriticalName); 2036 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 2037 } 2038 2039 namespace { 2040 /// Common pre(post)-action for different OpenMP constructs. 2041 class CommonActionTy final : public PrePostActionTy { 2042 llvm::Value *EnterCallee; 2043 ArrayRef<llvm::Value *> EnterArgs; 2044 llvm::Value *ExitCallee; 2045 ArrayRef<llvm::Value *> ExitArgs; 2046 bool Conditional; 2047 llvm::BasicBlock *ContBlock = nullptr; 2048 2049 public: 2050 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2051 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2052 bool Conditional = false) 2053 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2054 ExitArgs(ExitArgs), Conditional(Conditional) {} 2055 void Enter(CodeGenFunction &CGF) override { 2056 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2057 if (Conditional) { 2058 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2059 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2060 ContBlock = CGF.createBasicBlock("omp_if.end"); 2061 // Generate the branch (If-stmt) 2062 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2063 CGF.EmitBlock(ThenBlock); 2064 } 2065 } 2066 void Done(CodeGenFunction &CGF) { 2067 // Emit the rest of blocks/branches 2068 CGF.EmitBranch(ContBlock); 2069 CGF.EmitBlock(ContBlock, true); 2070 } 2071 void Exit(CodeGenFunction &CGF) override { 2072 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2073 } 2074 }; 2075 } // anonymous namespace 2076 2077 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2078 StringRef CriticalName, 2079 const RegionCodeGenTy &CriticalOpGen, 2080 SourceLocation Loc, const Expr *Hint) { 2081 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2082 // CriticalOpGen(); 2083 // __kmpc_end_critical(ident_t *, gtid, Lock); 2084 // Prepare arguments and build a call to __kmpc_critical 2085 if (!CGF.HaveInsertPoint()) 2086 return; 2087 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2088 getCriticalRegionLock(CriticalName)}; 2089 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2090 std::end(Args)); 2091 if (Hint) { 2092 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2093 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2094 } 2095 CommonActionTy Action( 2096 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2097 : OMPRTL__kmpc_critical), 2098 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2099 CriticalOpGen.setAction(Action); 2100 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2101 } 2102 2103 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2104 const RegionCodeGenTy &MasterOpGen, 2105 SourceLocation Loc) { 2106 if (!CGF.HaveInsertPoint()) 2107 return; 2108 // if(__kmpc_master(ident_t *, gtid)) { 2109 // MasterOpGen(); 2110 // __kmpc_end_master(ident_t *, gtid); 2111 // } 2112 // Prepare arguments and build a call to __kmpc_master 2113 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2114 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2115 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2116 /*Conditional=*/true); 2117 MasterOpGen.setAction(Action); 2118 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2119 Action.Done(CGF); 2120 } 2121 2122 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2123 SourceLocation Loc) { 2124 if (!CGF.HaveInsertPoint()) 2125 return; 2126 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2127 llvm::Value *Args[] = { 2128 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2129 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2130 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2131 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2132 Region->emitUntiedSwitch(CGF); 2133 } 2134 2135 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2136 const RegionCodeGenTy &TaskgroupOpGen, 2137 SourceLocation Loc) { 2138 if (!CGF.HaveInsertPoint()) 2139 return; 2140 // __kmpc_taskgroup(ident_t *, gtid); 2141 // TaskgroupOpGen(); 2142 // __kmpc_end_taskgroup(ident_t *, gtid); 2143 // Prepare arguments and build a call to __kmpc_taskgroup 2144 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2145 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2146 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2147 Args); 2148 TaskgroupOpGen.setAction(Action); 2149 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2150 } 2151 2152 /// Given an array of pointers to variables, project the address of a 2153 /// given variable. 2154 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2155 unsigned Index, const VarDecl *Var) { 2156 // Pull out the pointer to the variable. 2157 Address PtrAddr = 2158 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2159 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2160 2161 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2162 Addr = CGF.Builder.CreateElementBitCast( 2163 Addr, CGF.ConvertTypeForMem(Var->getType())); 2164 return Addr; 2165 } 2166 2167 static llvm::Value *emitCopyprivateCopyFunction( 2168 CodeGenModule &CGM, llvm::Type *ArgsType, 2169 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2170 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 2171 auto &C = CGM.getContext(); 2172 // void copy_func(void *LHSArg, void *RHSArg); 2173 FunctionArgList Args; 2174 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2175 C.VoidPtrTy); 2176 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2177 C.VoidPtrTy); 2178 Args.push_back(&LHSArg); 2179 Args.push_back(&RHSArg); 2180 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2181 auto *Fn = llvm::Function::Create( 2182 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2183 ".omp.copyprivate.copy_func", &CGM.getModule()); 2184 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2185 CodeGenFunction CGF(CGM); 2186 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2187 // Dest = (void*[n])(LHSArg); 2188 // Src = (void*[n])(RHSArg); 2189 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2190 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2191 ArgsType), CGF.getPointerAlign()); 2192 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2193 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2194 ArgsType), CGF.getPointerAlign()); 2195 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2196 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2197 // ... 2198 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2199 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2200 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2201 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2202 2203 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2204 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2205 2206 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2207 QualType Type = VD->getType(); 2208 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2209 } 2210 CGF.FinishFunction(); 2211 return Fn; 2212 } 2213 2214 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2215 const RegionCodeGenTy &SingleOpGen, 2216 SourceLocation Loc, 2217 ArrayRef<const Expr *> CopyprivateVars, 2218 ArrayRef<const Expr *> SrcExprs, 2219 ArrayRef<const Expr *> DstExprs, 2220 ArrayRef<const Expr *> AssignmentOps) { 2221 if (!CGF.HaveInsertPoint()) 2222 return; 2223 assert(CopyprivateVars.size() == SrcExprs.size() && 2224 CopyprivateVars.size() == DstExprs.size() && 2225 CopyprivateVars.size() == AssignmentOps.size()); 2226 auto &C = CGM.getContext(); 2227 // int32 did_it = 0; 2228 // if(__kmpc_single(ident_t *, gtid)) { 2229 // SingleOpGen(); 2230 // __kmpc_end_single(ident_t *, gtid); 2231 // did_it = 1; 2232 // } 2233 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2234 // <copy_func>, did_it); 2235 2236 Address DidIt = Address::invalid(); 2237 if (!CopyprivateVars.empty()) { 2238 // int32 did_it = 0; 2239 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2240 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2241 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2242 } 2243 // Prepare arguments and build a call to __kmpc_single 2244 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2245 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2246 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2247 /*Conditional=*/true); 2248 SingleOpGen.setAction(Action); 2249 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2250 if (DidIt.isValid()) { 2251 // did_it = 1; 2252 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2253 } 2254 Action.Done(CGF); 2255 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2256 // <copy_func>, did_it); 2257 if (DidIt.isValid()) { 2258 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2259 auto CopyprivateArrayTy = 2260 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2261 /*IndexTypeQuals=*/0); 2262 // Create a list of all private variables for copyprivate. 2263 Address CopyprivateList = 2264 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2265 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2266 Address Elem = CGF.Builder.CreateConstArrayGEP( 2267 CopyprivateList, I, CGF.getPointerSize()); 2268 CGF.Builder.CreateStore( 2269 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2270 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2271 Elem); 2272 } 2273 // Build function that copies private values from single region to all other 2274 // threads in the corresponding parallel region. 2275 auto *CpyFn = emitCopyprivateCopyFunction( 2276 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2277 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 2278 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2279 Address CL = 2280 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2281 CGF.VoidPtrTy); 2282 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2283 llvm::Value *Args[] = { 2284 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2285 getThreadID(CGF, Loc), // i32 <gtid> 2286 BufSize, // size_t <buf_size> 2287 CL.getPointer(), // void *<copyprivate list> 2288 CpyFn, // void (*) (void *, void *) <copy_func> 2289 DidItVal // i32 did_it 2290 }; 2291 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2292 } 2293 } 2294 2295 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2296 const RegionCodeGenTy &OrderedOpGen, 2297 SourceLocation Loc, bool IsThreads) { 2298 if (!CGF.HaveInsertPoint()) 2299 return; 2300 // __kmpc_ordered(ident_t *, gtid); 2301 // OrderedOpGen(); 2302 // __kmpc_end_ordered(ident_t *, gtid); 2303 // Prepare arguments and build a call to __kmpc_ordered 2304 if (IsThreads) { 2305 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2306 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2307 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2308 Args); 2309 OrderedOpGen.setAction(Action); 2310 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2311 return; 2312 } 2313 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2314 } 2315 2316 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2317 OpenMPDirectiveKind Kind, bool EmitChecks, 2318 bool ForceSimpleCall) { 2319 if (!CGF.HaveInsertPoint()) 2320 return; 2321 // Build call __kmpc_cancel_barrier(loc, thread_id); 2322 // Build call __kmpc_barrier(loc, thread_id); 2323 unsigned Flags; 2324 if (Kind == OMPD_for) 2325 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2326 else if (Kind == OMPD_sections) 2327 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2328 else if (Kind == OMPD_single) 2329 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2330 else if (Kind == OMPD_barrier) 2331 Flags = OMP_IDENT_BARRIER_EXPL; 2332 else 2333 Flags = OMP_IDENT_BARRIER_IMPL; 2334 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2335 // thread_id); 2336 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2337 getThreadID(CGF, Loc)}; 2338 if (auto *OMPRegionInfo = 2339 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2340 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2341 auto *Result = CGF.EmitRuntimeCall( 2342 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2343 if (EmitChecks) { 2344 // if (__kmpc_cancel_barrier()) { 2345 // exit from construct; 2346 // } 2347 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2348 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2349 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2350 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2351 CGF.EmitBlock(ExitBB); 2352 // exit from construct; 2353 auto CancelDestination = 2354 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2355 CGF.EmitBranchThroughCleanup(CancelDestination); 2356 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2357 } 2358 return; 2359 } 2360 } 2361 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2362 } 2363 2364 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2365 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2366 bool Chunked, bool Ordered) { 2367 switch (ScheduleKind) { 2368 case OMPC_SCHEDULE_static: 2369 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2370 : (Ordered ? OMP_ord_static : OMP_sch_static); 2371 case OMPC_SCHEDULE_dynamic: 2372 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2373 case OMPC_SCHEDULE_guided: 2374 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2375 case OMPC_SCHEDULE_runtime: 2376 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2377 case OMPC_SCHEDULE_auto: 2378 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2379 case OMPC_SCHEDULE_unknown: 2380 assert(!Chunked && "chunk was specified but schedule kind not known"); 2381 return Ordered ? OMP_ord_static : OMP_sch_static; 2382 } 2383 llvm_unreachable("Unexpected runtime schedule"); 2384 } 2385 2386 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 2387 static OpenMPSchedType 2388 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2389 // only static is allowed for dist_schedule 2390 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2391 } 2392 2393 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2394 bool Chunked) const { 2395 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2396 return Schedule == OMP_sch_static; 2397 } 2398 2399 bool CGOpenMPRuntime::isStaticNonchunked( 2400 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2401 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2402 return Schedule == OMP_dist_sch_static; 2403 } 2404 2405 2406 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2407 auto Schedule = 2408 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2409 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2410 return Schedule != OMP_sch_static; 2411 } 2412 2413 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 2414 OpenMPScheduleClauseModifier M1, 2415 OpenMPScheduleClauseModifier M2) { 2416 int Modifier = 0; 2417 switch (M1) { 2418 case OMPC_SCHEDULE_MODIFIER_monotonic: 2419 Modifier = OMP_sch_modifier_monotonic; 2420 break; 2421 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2422 Modifier = OMP_sch_modifier_nonmonotonic; 2423 break; 2424 case OMPC_SCHEDULE_MODIFIER_simd: 2425 if (Schedule == OMP_sch_static_chunked) 2426 Schedule = OMP_sch_static_balanced_chunked; 2427 break; 2428 case OMPC_SCHEDULE_MODIFIER_last: 2429 case OMPC_SCHEDULE_MODIFIER_unknown: 2430 break; 2431 } 2432 switch (M2) { 2433 case OMPC_SCHEDULE_MODIFIER_monotonic: 2434 Modifier = OMP_sch_modifier_monotonic; 2435 break; 2436 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2437 Modifier = OMP_sch_modifier_nonmonotonic; 2438 break; 2439 case OMPC_SCHEDULE_MODIFIER_simd: 2440 if (Schedule == OMP_sch_static_chunked) 2441 Schedule = OMP_sch_static_balanced_chunked; 2442 break; 2443 case OMPC_SCHEDULE_MODIFIER_last: 2444 case OMPC_SCHEDULE_MODIFIER_unknown: 2445 break; 2446 } 2447 return Schedule | Modifier; 2448 } 2449 2450 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 2451 SourceLocation Loc, 2452 const OpenMPScheduleTy &ScheduleKind, 2453 unsigned IVSize, bool IVSigned, 2454 bool Ordered, llvm::Value *UB, 2455 llvm::Value *Chunk) { 2456 if (!CGF.HaveInsertPoint()) 2457 return; 2458 OpenMPSchedType Schedule = 2459 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 2460 assert(Ordered || 2461 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2462 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2463 Schedule != OMP_sch_static_balanced_chunked)); 2464 // Call __kmpc_dispatch_init( 2465 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2466 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2467 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2468 2469 // If the Chunk was not specified in the clause - use default value 1. 2470 if (Chunk == nullptr) 2471 Chunk = CGF.Builder.getIntN(IVSize, 1); 2472 llvm::Value *Args[] = { 2473 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2474 CGF.Builder.getInt32(addMonoNonMonoModifier( 2475 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2476 CGF.Builder.getIntN(IVSize, 0), // Lower 2477 UB, // Upper 2478 CGF.Builder.getIntN(IVSize, 1), // Stride 2479 Chunk // Chunk 2480 }; 2481 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2482 } 2483 2484 static void emitForStaticInitCall( 2485 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2486 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 2487 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2488 unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, 2489 Address ST, llvm::Value *Chunk) { 2490 if (!CGF.HaveInsertPoint()) 2491 return; 2492 2493 assert(!Ordered); 2494 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2495 Schedule == OMP_sch_static_balanced_chunked || 2496 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2497 Schedule == OMP_dist_sch_static || 2498 Schedule == OMP_dist_sch_static_chunked); 2499 2500 // Call __kmpc_for_static_init( 2501 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2502 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2503 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2504 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2505 if (Chunk == nullptr) { 2506 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2507 Schedule == OMP_dist_sch_static) && 2508 "expected static non-chunked schedule"); 2509 // If the Chunk was not specified in the clause - use default value 1. 2510 Chunk = CGF.Builder.getIntN(IVSize, 1); 2511 } else { 2512 assert((Schedule == OMP_sch_static_chunked || 2513 Schedule == OMP_sch_static_balanced_chunked || 2514 Schedule == OMP_ord_static_chunked || 2515 Schedule == OMP_dist_sch_static_chunked) && 2516 "expected static chunked schedule"); 2517 } 2518 llvm::Value *Args[] = { 2519 UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( 2520 Schedule, M1, M2)), // Schedule type 2521 IL.getPointer(), // &isLastIter 2522 LB.getPointer(), // &LB 2523 UB.getPointer(), // &UB 2524 ST.getPointer(), // &Stride 2525 CGF.Builder.getIntN(IVSize, 1), // Incr 2526 Chunk // Chunk 2527 }; 2528 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2529 } 2530 2531 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2532 SourceLocation Loc, 2533 const OpenMPScheduleTy &ScheduleKind, 2534 unsigned IVSize, bool IVSigned, 2535 bool Ordered, Address IL, Address LB, 2536 Address UB, Address ST, 2537 llvm::Value *Chunk) { 2538 OpenMPSchedType ScheduleNum = 2539 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 2540 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2541 auto *ThreadId = getThreadID(CGF, Loc); 2542 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2543 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2544 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, 2545 Ordered, IL, LB, UB, ST, Chunk); 2546 } 2547 2548 void CGOpenMPRuntime::emitDistributeStaticInit( 2549 CodeGenFunction &CGF, SourceLocation Loc, 2550 OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, 2551 bool Ordered, Address IL, Address LB, Address UB, Address ST, 2552 llvm::Value *Chunk) { 2553 OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); 2554 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2555 auto *ThreadId = getThreadID(CGF, Loc); 2556 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2557 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2558 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2559 OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, 2560 UB, ST, Chunk); 2561 } 2562 2563 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2564 SourceLocation Loc) { 2565 if (!CGF.HaveInsertPoint()) 2566 return; 2567 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2568 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2569 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 2570 Args); 2571 } 2572 2573 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2574 SourceLocation Loc, 2575 unsigned IVSize, 2576 bool IVSigned) { 2577 if (!CGF.HaveInsertPoint()) 2578 return; 2579 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2580 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2581 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2582 } 2583 2584 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2585 SourceLocation Loc, unsigned IVSize, 2586 bool IVSigned, Address IL, 2587 Address LB, Address UB, 2588 Address ST) { 2589 // Call __kmpc_dispatch_next( 2590 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2591 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2592 // kmp_int[32|64] *p_stride); 2593 llvm::Value *Args[] = { 2594 emitUpdateLocation(CGF, Loc), 2595 getThreadID(CGF, Loc), 2596 IL.getPointer(), // &isLastIter 2597 LB.getPointer(), // &Lower 2598 UB.getPointer(), // &Upper 2599 ST.getPointer() // &Stride 2600 }; 2601 llvm::Value *Call = 2602 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2603 return CGF.EmitScalarConversion( 2604 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 2605 CGF.getContext().BoolTy, Loc); 2606 } 2607 2608 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2609 llvm::Value *NumThreads, 2610 SourceLocation Loc) { 2611 if (!CGF.HaveInsertPoint()) 2612 return; 2613 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2614 llvm::Value *Args[] = { 2615 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2616 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2617 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 2618 Args); 2619 } 2620 2621 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2622 OpenMPProcBindClauseKind ProcBind, 2623 SourceLocation Loc) { 2624 if (!CGF.HaveInsertPoint()) 2625 return; 2626 // Constants for proc bind value accepted by the runtime. 2627 enum ProcBindTy { 2628 ProcBindFalse = 0, 2629 ProcBindTrue, 2630 ProcBindMaster, 2631 ProcBindClose, 2632 ProcBindSpread, 2633 ProcBindIntel, 2634 ProcBindDefault 2635 } RuntimeProcBind; 2636 switch (ProcBind) { 2637 case OMPC_PROC_BIND_master: 2638 RuntimeProcBind = ProcBindMaster; 2639 break; 2640 case OMPC_PROC_BIND_close: 2641 RuntimeProcBind = ProcBindClose; 2642 break; 2643 case OMPC_PROC_BIND_spread: 2644 RuntimeProcBind = ProcBindSpread; 2645 break; 2646 case OMPC_PROC_BIND_unknown: 2647 llvm_unreachable("Unsupported proc_bind value."); 2648 } 2649 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2650 llvm::Value *Args[] = { 2651 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2652 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 2653 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 2654 } 2655 2656 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2657 SourceLocation Loc) { 2658 if (!CGF.HaveInsertPoint()) 2659 return; 2660 // Build call void __kmpc_flush(ident_t *loc) 2661 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 2662 emitUpdateLocation(CGF, Loc)); 2663 } 2664 2665 namespace { 2666 /// \brief Indexes of fields for type kmp_task_t. 2667 enum KmpTaskTFields { 2668 /// \brief List of shared variables. 2669 KmpTaskTShareds, 2670 /// \brief Task routine. 2671 KmpTaskTRoutine, 2672 /// \brief Partition id for the untied tasks. 2673 KmpTaskTPartId, 2674 /// Function with call of destructors for private variables. 2675 Data1, 2676 /// Task priority. 2677 Data2, 2678 /// (Taskloops only) Lower bound. 2679 KmpTaskTLowerBound, 2680 /// (Taskloops only) Upper bound. 2681 KmpTaskTUpperBound, 2682 /// (Taskloops only) Stride. 2683 KmpTaskTStride, 2684 /// (Taskloops only) Is last iteration flag. 2685 KmpTaskTLastIter, 2686 }; 2687 } // anonymous namespace 2688 2689 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2690 // FIXME: Add other entries type when they become supported. 2691 return OffloadEntriesTargetRegion.empty(); 2692 } 2693 2694 /// \brief Initialize target region entry. 2695 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2696 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2697 StringRef ParentName, unsigned LineNum, 2698 unsigned Order) { 2699 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2700 "only required for the device " 2701 "code generation."); 2702 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2703 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); 2704 ++OffloadingEntriesNum; 2705 } 2706 2707 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2708 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2709 StringRef ParentName, unsigned LineNum, 2710 llvm::Constant *Addr, llvm::Constant *ID) { 2711 // If we are emitting code for a target, the entry is already initialized, 2712 // only has to be registered. 2713 if (CGM.getLangOpts().OpenMPIsDevice) { 2714 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2715 "Entry must exist."); 2716 auto &Entry = 2717 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2718 assert(Entry.isValid() && "Entry not initialized!"); 2719 Entry.setAddress(Addr); 2720 Entry.setID(ID); 2721 return; 2722 } else { 2723 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); 2724 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2725 } 2726 } 2727 2728 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2729 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2730 unsigned LineNum) const { 2731 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2732 if (PerDevice == OffloadEntriesTargetRegion.end()) 2733 return false; 2734 auto PerFile = PerDevice->second.find(FileID); 2735 if (PerFile == PerDevice->second.end()) 2736 return false; 2737 auto PerParentName = PerFile->second.find(ParentName); 2738 if (PerParentName == PerFile->second.end()) 2739 return false; 2740 auto PerLine = PerParentName->second.find(LineNum); 2741 if (PerLine == PerParentName->second.end()) 2742 return false; 2743 // Fail if this entry is already registered. 2744 if (PerLine->second.getAddress() || PerLine->second.getID()) 2745 return false; 2746 return true; 2747 } 2748 2749 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2750 const OffloadTargetRegionEntryInfoActTy &Action) { 2751 // Scan all target region entries and perform the provided action. 2752 for (auto &D : OffloadEntriesTargetRegion) 2753 for (auto &F : D.second) 2754 for (auto &P : F.second) 2755 for (auto &L : P.second) 2756 Action(D.first, F.first, P.first(), L.first, L.second); 2757 } 2758 2759 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 2760 /// \a Codegen. This is used to emit the two functions that register and 2761 /// unregister the descriptor of the current compilation unit. 2762 static llvm::Function * 2763 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 2764 const RegionCodeGenTy &Codegen) { 2765 auto &C = CGM.getContext(); 2766 FunctionArgList Args; 2767 ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), 2768 /*Id=*/nullptr, C.VoidPtrTy); 2769 Args.push_back(&DummyPtr); 2770 2771 CodeGenFunction CGF(CGM); 2772 GlobalDecl(); 2773 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2774 auto FTy = CGM.getTypes().GetFunctionType(FI); 2775 auto *Fn = 2776 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 2777 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 2778 Codegen(CGF); 2779 CGF.FinishFunction(); 2780 return Fn; 2781 } 2782 2783 llvm::Function * 2784 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 2785 2786 // If we don't have entries or if we are emitting code for the device, we 2787 // don't need to do anything. 2788 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 2789 return nullptr; 2790 2791 auto &M = CGM.getModule(); 2792 auto &C = CGM.getContext(); 2793 2794 // Get list of devices we care about 2795 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 2796 2797 // We should be creating an offloading descriptor only if there are devices 2798 // specified. 2799 assert(!Devices.empty() && "No OpenMP offloading devices??"); 2800 2801 // Create the external variables that will point to the begin and end of the 2802 // host entries section. These will be defined by the linker. 2803 auto *OffloadEntryTy = 2804 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 2805 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 2806 M, OffloadEntryTy, /*isConstant=*/true, 2807 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2808 ".omp_offloading.entries_begin"); 2809 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 2810 M, OffloadEntryTy, /*isConstant=*/true, 2811 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2812 ".omp_offloading.entries_end"); 2813 2814 // Create all device images 2815 auto *DeviceImageTy = cast<llvm::StructType>( 2816 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 2817 ConstantInitBuilder DeviceImagesBuilder(CGM); 2818 auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); 2819 2820 for (unsigned i = 0; i < Devices.size(); ++i) { 2821 StringRef T = Devices[i].getTriple(); 2822 auto *ImgBegin = new llvm::GlobalVariable( 2823 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2824 /*Initializer=*/nullptr, 2825 Twine(".omp_offloading.img_start.") + Twine(T)); 2826 auto *ImgEnd = new llvm::GlobalVariable( 2827 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2828 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 2829 2830 auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); 2831 Dev.add(ImgBegin); 2832 Dev.add(ImgEnd); 2833 Dev.add(HostEntriesBegin); 2834 Dev.add(HostEntriesEnd); 2835 Dev.finishAndAddTo(DeviceImagesEntries); 2836 } 2837 2838 // Create device images global array. 2839 llvm::GlobalVariable *DeviceImages = 2840 DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", 2841 CGM.getPointerAlign(), 2842 /*isConstant=*/true); 2843 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2844 2845 // This is a Zero array to be used in the creation of the constant expressions 2846 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 2847 llvm::Constant::getNullValue(CGM.Int32Ty)}; 2848 2849 // Create the target region descriptor. 2850 auto *BinaryDescriptorTy = cast<llvm::StructType>( 2851 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 2852 ConstantInitBuilder DescBuilder(CGM); 2853 auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); 2854 DescInit.addInt(CGM.Int32Ty, Devices.size()); 2855 DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 2856 DeviceImages, 2857 Index)); 2858 DescInit.add(HostEntriesBegin); 2859 DescInit.add(HostEntriesEnd); 2860 2861 auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", 2862 CGM.getPointerAlign(), 2863 /*isConstant=*/true); 2864 2865 // Emit code to register or unregister the descriptor at execution 2866 // startup or closing, respectively. 2867 2868 // Create a variable to drive the registration and unregistration of the 2869 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 2870 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 2871 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 2872 IdentInfo, C.CharTy); 2873 2874 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 2875 CGM, ".omp_offloading.descriptor_unreg", 2876 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2877 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 2878 Desc); 2879 }); 2880 auto *RegFn = createOffloadingBinaryDescriptorFunction( 2881 CGM, ".omp_offloading.descriptor_reg", 2882 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2883 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 2884 Desc); 2885 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 2886 }); 2887 return RegFn; 2888 } 2889 2890 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 2891 llvm::Constant *Addr, uint64_t Size) { 2892 StringRef Name = Addr->getName(); 2893 auto *TgtOffloadEntryType = cast<llvm::StructType>( 2894 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 2895 llvm::LLVMContext &C = CGM.getModule().getContext(); 2896 llvm::Module &M = CGM.getModule(); 2897 2898 // Make sure the address has the right type. 2899 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 2900 2901 // Create constant string with the name. 2902 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 2903 2904 llvm::GlobalVariable *Str = 2905 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 2906 llvm::GlobalValue::InternalLinkage, StrPtrInit, 2907 ".omp_offloading.entry_name"); 2908 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2909 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 2910 2911 // We can't have any padding between symbols, so we need to have 1-byte 2912 // alignment. 2913 auto Align = CharUnits::fromQuantity(1); 2914 2915 // Create the entry struct. 2916 ConstantInitBuilder EntryBuilder(CGM); 2917 auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); 2918 EntryInit.add(AddrPtr); 2919 EntryInit.add(StrPtr); 2920 EntryInit.addInt(CGM.SizeTy, Size); 2921 llvm::GlobalVariable *Entry = 2922 EntryInit.finishAndCreateGlobal(".omp_offloading.entry", 2923 Align, 2924 /*constant*/ true, 2925 llvm::GlobalValue::ExternalLinkage); 2926 2927 // The entry has to be created in the section the linker expects it to be. 2928 Entry->setSection(".omp_offloading.entries"); 2929 } 2930 2931 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2932 // Emit the offloading entries and metadata so that the device codegen side 2933 // can 2934 // easily figure out what to emit. The produced metadata looks like this: 2935 // 2936 // !omp_offload.info = !{!1, ...} 2937 // 2938 // Right now we only generate metadata for function that contain target 2939 // regions. 2940 2941 // If we do not have entries, we dont need to do anything. 2942 if (OffloadEntriesInfoManager.empty()) 2943 return; 2944 2945 llvm::Module &M = CGM.getModule(); 2946 llvm::LLVMContext &C = M.getContext(); 2947 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 2948 OrderedEntries(OffloadEntriesInfoManager.size()); 2949 2950 // Create the offloading info metadata node. 2951 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 2952 2953 // Auxiliar methods to create metadata values and strings. 2954 auto getMDInt = [&](unsigned v) { 2955 return llvm::ConstantAsMetadata::get( 2956 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 2957 }; 2958 2959 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 2960 2961 // Create function that emits metadata for each target region entry; 2962 auto &&TargetRegionMetadataEmitter = [&]( 2963 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 2964 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 2965 llvm::SmallVector<llvm::Metadata *, 32> Ops; 2966 // Generate metadata for target regions. Each entry of this metadata 2967 // contains: 2968 // - Entry 0 -> Kind of this type of metadata (0). 2969 // - Entry 1 -> Device ID of the file where the entry was identified. 2970 // - Entry 2 -> File ID of the file where the entry was identified. 2971 // - Entry 3 -> Mangled name of the function where the entry was identified. 2972 // - Entry 4 -> Line in the file where the entry was identified. 2973 // - Entry 5 -> Order the entry was created. 2974 // The first element of the metadata node is the kind. 2975 Ops.push_back(getMDInt(E.getKind())); 2976 Ops.push_back(getMDInt(DeviceID)); 2977 Ops.push_back(getMDInt(FileID)); 2978 Ops.push_back(getMDString(ParentName)); 2979 Ops.push_back(getMDInt(Line)); 2980 Ops.push_back(getMDInt(E.getOrder())); 2981 2982 // Save this entry in the right position of the ordered entries array. 2983 OrderedEntries[E.getOrder()] = &E; 2984 2985 // Add metadata to the named metadata node. 2986 MD->addOperand(llvm::MDNode::get(C, Ops)); 2987 }; 2988 2989 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 2990 TargetRegionMetadataEmitter); 2991 2992 for (auto *E : OrderedEntries) { 2993 assert(E && "All ordered entries must exist!"); 2994 if (auto *CE = 2995 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 2996 E)) { 2997 assert(CE->getID() && CE->getAddress() && 2998 "Entry ID and Addr are invalid!"); 2999 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 3000 } else 3001 llvm_unreachable("Unsupported entry kind."); 3002 } 3003 } 3004 3005 /// \brief Loads all the offload entries information from the host IR 3006 /// metadata. 3007 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3008 // If we are in target mode, load the metadata from the host IR. This code has 3009 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3010 3011 if (!CGM.getLangOpts().OpenMPIsDevice) 3012 return; 3013 3014 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3015 return; 3016 3017 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3018 if (Buf.getError()) 3019 return; 3020 3021 llvm::LLVMContext C; 3022 auto ME = expectedToErrorOrAndEmitErrors( 3023 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3024 3025 if (ME.getError()) 3026 return; 3027 3028 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3029 if (!MD) 3030 return; 3031 3032 for (auto I : MD->operands()) { 3033 llvm::MDNode *MN = cast<llvm::MDNode>(I); 3034 3035 auto getMDInt = [&](unsigned Idx) { 3036 llvm::ConstantAsMetadata *V = 3037 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3038 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3039 }; 3040 3041 auto getMDString = [&](unsigned Idx) { 3042 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3043 return V->getString(); 3044 }; 3045 3046 switch (getMDInt(0)) { 3047 default: 3048 llvm_unreachable("Unexpected metadata!"); 3049 break; 3050 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3051 OFFLOAD_ENTRY_INFO_TARGET_REGION: 3052 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3053 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 3054 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 3055 /*Order=*/getMDInt(5)); 3056 break; 3057 } 3058 } 3059 } 3060 3061 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3062 if (!KmpRoutineEntryPtrTy) { 3063 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3064 auto &C = CGM.getContext(); 3065 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3066 FunctionProtoType::ExtProtoInfo EPI; 3067 KmpRoutineEntryPtrQTy = C.getPointerType( 3068 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3069 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3070 } 3071 } 3072 3073 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 3074 QualType FieldTy) { 3075 auto *Field = FieldDecl::Create( 3076 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 3077 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 3078 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 3079 Field->setAccess(AS_public); 3080 DC->addDecl(Field); 3081 return Field; 3082 } 3083 3084 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3085 3086 // Make sure the type of the entry is already created. This is the type we 3087 // have to create: 3088 // struct __tgt_offload_entry{ 3089 // void *addr; // Pointer to the offload entry info. 3090 // // (function or global) 3091 // char *name; // Name of the function or global. 3092 // size_t size; // Size of the entry info (0 if it a function). 3093 // }; 3094 if (TgtOffloadEntryQTy.isNull()) { 3095 ASTContext &C = CGM.getContext(); 3096 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3097 RD->startDefinition(); 3098 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3099 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3100 addFieldToRecordDecl(C, RD, C.getSizeType()); 3101 RD->completeDefinition(); 3102 TgtOffloadEntryQTy = C.getRecordType(RD); 3103 } 3104 return TgtOffloadEntryQTy; 3105 } 3106 3107 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 3108 // These are the types we need to build: 3109 // struct __tgt_device_image{ 3110 // void *ImageStart; // Pointer to the target code start. 3111 // void *ImageEnd; // Pointer to the target code end. 3112 // // We also add the host entries to the device image, as it may be useful 3113 // // for the target runtime to have access to that information. 3114 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 3115 // // the entries. 3116 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3117 // // entries (non inclusive). 3118 // }; 3119 if (TgtDeviceImageQTy.isNull()) { 3120 ASTContext &C = CGM.getContext(); 3121 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 3122 RD->startDefinition(); 3123 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3124 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3125 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3126 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3127 RD->completeDefinition(); 3128 TgtDeviceImageQTy = C.getRecordType(RD); 3129 } 3130 return TgtDeviceImageQTy; 3131 } 3132 3133 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 3134 // struct __tgt_bin_desc{ 3135 // int32_t NumDevices; // Number of devices supported. 3136 // __tgt_device_image *DeviceImages; // Arrays of device images 3137 // // (one per device). 3138 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 3139 // // entries. 3140 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3141 // // entries (non inclusive). 3142 // }; 3143 if (TgtBinaryDescriptorQTy.isNull()) { 3144 ASTContext &C = CGM.getContext(); 3145 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 3146 RD->startDefinition(); 3147 addFieldToRecordDecl( 3148 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3149 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 3150 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3151 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3152 RD->completeDefinition(); 3153 TgtBinaryDescriptorQTy = C.getRecordType(RD); 3154 } 3155 return TgtBinaryDescriptorQTy; 3156 } 3157 3158 namespace { 3159 struct PrivateHelpersTy { 3160 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 3161 const VarDecl *PrivateElemInit) 3162 : Original(Original), PrivateCopy(PrivateCopy), 3163 PrivateElemInit(PrivateElemInit) {} 3164 const VarDecl *Original; 3165 const VarDecl *PrivateCopy; 3166 const VarDecl *PrivateElemInit; 3167 }; 3168 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3169 } // anonymous namespace 3170 3171 static RecordDecl * 3172 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3173 if (!Privates.empty()) { 3174 auto &C = CGM.getContext(); 3175 // Build struct .kmp_privates_t. { 3176 // /* private vars */ 3177 // }; 3178 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 3179 RD->startDefinition(); 3180 for (auto &&Pair : Privates) { 3181 auto *VD = Pair.second.Original; 3182 auto Type = VD->getType(); 3183 Type = Type.getNonReferenceType(); 3184 auto *FD = addFieldToRecordDecl(C, RD, Type); 3185 if (VD->hasAttrs()) { 3186 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3187 E(VD->getAttrs().end()); 3188 I != E; ++I) 3189 FD->addAttr(*I); 3190 } 3191 } 3192 RD->completeDefinition(); 3193 return RD; 3194 } 3195 return nullptr; 3196 } 3197 3198 static RecordDecl * 3199 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3200 QualType KmpInt32Ty, 3201 QualType KmpRoutineEntryPointerQTy) { 3202 auto &C = CGM.getContext(); 3203 // Build struct kmp_task_t { 3204 // void * shareds; 3205 // kmp_routine_entry_t routine; 3206 // kmp_int32 part_id; 3207 // kmp_cmplrdata_t data1; 3208 // kmp_cmplrdata_t data2; 3209 // For taskloops additional fields: 3210 // kmp_uint64 lb; 3211 // kmp_uint64 ub; 3212 // kmp_int64 st; 3213 // kmp_int32 liter; 3214 // }; 3215 auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3216 UD->startDefinition(); 3217 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3218 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3219 UD->completeDefinition(); 3220 QualType KmpCmplrdataTy = C.getRecordType(UD); 3221 auto *RD = C.buildImplicitRecord("kmp_task_t"); 3222 RD->startDefinition(); 3223 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3224 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3225 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3226 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3227 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3228 if (isOpenMPTaskLoopDirective(Kind)) { 3229 QualType KmpUInt64Ty = 3230 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3231 QualType KmpInt64Ty = 3232 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3233 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3234 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3235 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3236 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3237 } 3238 RD->completeDefinition(); 3239 return RD; 3240 } 3241 3242 static RecordDecl * 3243 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3244 ArrayRef<PrivateDataTy> Privates) { 3245 auto &C = CGM.getContext(); 3246 // Build struct kmp_task_t_with_privates { 3247 // kmp_task_t task_data; 3248 // .kmp_privates_t. privates; 3249 // }; 3250 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3251 RD->startDefinition(); 3252 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3253 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 3254 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3255 } 3256 RD->completeDefinition(); 3257 return RD; 3258 } 3259 3260 /// \brief Emit a proxy function which accepts kmp_task_t as the second 3261 /// argument. 3262 /// \code 3263 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3264 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3265 /// For taskloops: 3266 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3267 /// tt->shareds); 3268 /// return 0; 3269 /// } 3270 /// \endcode 3271 static llvm::Value * 3272 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3273 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3274 QualType KmpTaskTWithPrivatesPtrQTy, 3275 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3276 QualType SharedsPtrTy, llvm::Value *TaskFunction, 3277 llvm::Value *TaskPrivatesMap) { 3278 auto &C = CGM.getContext(); 3279 FunctionArgList Args; 3280 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3281 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3282 /*Id=*/nullptr, 3283 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3284 Args.push_back(&GtidArg); 3285 Args.push_back(&TaskTypeArg); 3286 auto &TaskEntryFnInfo = 3287 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3288 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3289 auto *TaskEntry = 3290 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 3291 ".omp_task_entry.", &CGM.getModule()); 3292 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 3293 CodeGenFunction CGF(CGM); 3294 CGF.disableDebugInfo(); 3295 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 3296 3297 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3298 // tt, 3299 // For taskloops: 3300 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3301 // tt->task_data.shareds); 3302 auto *GtidParam = CGF.EmitLoadOfScalar( 3303 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3304 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3305 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3306 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3307 auto *KmpTaskTWithPrivatesQTyRD = 3308 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3309 LValue Base = 3310 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3311 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3312 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3313 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3314 auto *PartidParam = PartIdLVal.getPointer(); 3315 3316 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3317 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3318 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3319 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 3320 CGF.ConvertTypeForMem(SharedsPtrTy)); 3321 3322 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3323 llvm::Value *PrivatesParam; 3324 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3325 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3326 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3327 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3328 } else 3329 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3330 3331 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3332 TaskPrivatesMap, 3333 CGF.Builder 3334 .CreatePointerBitCastOrAddrSpaceCast( 3335 TDBase.getAddress(), CGF.VoidPtrTy) 3336 .getPointer()}; 3337 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3338 std::end(CommonArgs)); 3339 if (isOpenMPTaskLoopDirective(Kind)) { 3340 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3341 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3342 auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); 3343 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3344 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3345 auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); 3346 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3347 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 3348 auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); 3349 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3350 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 3351 auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); 3352 CallArgs.push_back(LBParam); 3353 CallArgs.push_back(UBParam); 3354 CallArgs.push_back(StParam); 3355 CallArgs.push_back(LIParam); 3356 } 3357 CallArgs.push_back(SharedsParam); 3358 3359 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 3360 CGF.EmitStoreThroughLValue( 3361 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3362 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3363 CGF.FinishFunction(); 3364 return TaskEntry; 3365 } 3366 3367 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3368 SourceLocation Loc, 3369 QualType KmpInt32Ty, 3370 QualType KmpTaskTWithPrivatesPtrQTy, 3371 QualType KmpTaskTWithPrivatesQTy) { 3372 auto &C = CGM.getContext(); 3373 FunctionArgList Args; 3374 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3375 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3376 /*Id=*/nullptr, 3377 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3378 Args.push_back(&GtidArg); 3379 Args.push_back(&TaskTypeArg); 3380 FunctionType::ExtInfo Info; 3381 auto &DestructorFnInfo = 3382 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3383 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 3384 auto *DestructorFn = 3385 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3386 ".omp_task_destructor.", &CGM.getModule()); 3387 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 3388 DestructorFnInfo); 3389 CodeGenFunction CGF(CGM); 3390 CGF.disableDebugInfo(); 3391 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3392 Args); 3393 3394 LValue Base = CGF.EmitLoadOfPointerLValue( 3395 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3396 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3397 auto *KmpTaskTWithPrivatesQTyRD = 3398 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3399 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3400 Base = CGF.EmitLValueForField(Base, *FI); 3401 for (auto *Field : 3402 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3403 if (auto DtorKind = Field->getType().isDestructedType()) { 3404 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 3405 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3406 } 3407 } 3408 CGF.FinishFunction(); 3409 return DestructorFn; 3410 } 3411 3412 /// \brief Emit a privates mapping function for correct handling of private and 3413 /// firstprivate variables. 3414 /// \code 3415 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3416 /// **noalias priv1,..., <tyn> **noalias privn) { 3417 /// *priv1 = &.privates.priv1; 3418 /// ...; 3419 /// *privn = &.privates.privn; 3420 /// } 3421 /// \endcode 3422 static llvm::Value * 3423 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3424 ArrayRef<const Expr *> PrivateVars, 3425 ArrayRef<const Expr *> FirstprivateVars, 3426 ArrayRef<const Expr *> LastprivateVars, 3427 QualType PrivatesQTy, 3428 ArrayRef<PrivateDataTy> Privates) { 3429 auto &C = CGM.getContext(); 3430 FunctionArgList Args; 3431 ImplicitParamDecl TaskPrivatesArg( 3432 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3433 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 3434 Args.push_back(&TaskPrivatesArg); 3435 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3436 unsigned Counter = 1; 3437 for (auto *E: PrivateVars) { 3438 Args.push_back(ImplicitParamDecl::Create( 3439 C, /*DC=*/nullptr, Loc, 3440 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3441 .withConst() 3442 .withRestrict())); 3443 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3444 PrivateVarsPos[VD] = Counter; 3445 ++Counter; 3446 } 3447 for (auto *E : FirstprivateVars) { 3448 Args.push_back(ImplicitParamDecl::Create( 3449 C, /*DC=*/nullptr, Loc, 3450 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3451 .withConst() 3452 .withRestrict())); 3453 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3454 PrivateVarsPos[VD] = Counter; 3455 ++Counter; 3456 } 3457 for (auto *E: LastprivateVars) { 3458 Args.push_back(ImplicitParamDecl::Create( 3459 C, /*DC=*/nullptr, Loc, 3460 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3461 .withConst() 3462 .withRestrict())); 3463 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3464 PrivateVarsPos[VD] = Counter; 3465 ++Counter; 3466 } 3467 auto &TaskPrivatesMapFnInfo = 3468 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3469 auto *TaskPrivatesMapTy = 3470 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3471 auto *TaskPrivatesMap = llvm::Function::Create( 3472 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 3473 ".omp_task_privates_map.", &CGM.getModule()); 3474 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 3475 TaskPrivatesMapFnInfo); 3476 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3477 CodeGenFunction CGF(CGM); 3478 CGF.disableDebugInfo(); 3479 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3480 TaskPrivatesMapFnInfo, Args); 3481 3482 // *privi = &.privates.privi; 3483 LValue Base = CGF.EmitLoadOfPointerLValue( 3484 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3485 TaskPrivatesArg.getType()->castAs<PointerType>()); 3486 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3487 Counter = 0; 3488 for (auto *Field : PrivatesQTyRD->fields()) { 3489 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 3490 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3491 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3492 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3493 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3494 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 3495 ++Counter; 3496 } 3497 CGF.FinishFunction(); 3498 return TaskPrivatesMap; 3499 } 3500 3501 static int array_pod_sort_comparator(const PrivateDataTy *P1, 3502 const PrivateDataTy *P2) { 3503 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 3504 } 3505 3506 /// Emit initialization for private variables in task-based directives. 3507 static void emitPrivatesInit(CodeGenFunction &CGF, 3508 const OMPExecutableDirective &D, 3509 Address KmpTaskSharedsPtr, LValue TDBase, 3510 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3511 QualType SharedsTy, QualType SharedsPtrTy, 3512 const OMPTaskDataTy &Data, 3513 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3514 auto &C = CGF.getContext(); 3515 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3516 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3517 LValue SrcBase; 3518 if (!Data.FirstprivateVars.empty()) { 3519 SrcBase = CGF.MakeAddrLValue( 3520 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3521 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3522 SharedsTy); 3523 } 3524 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 3525 cast<CapturedStmt>(*D.getAssociatedStmt())); 3526 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3527 for (auto &&Pair : Privates) { 3528 auto *VD = Pair.second.PrivateCopy; 3529 auto *Init = VD->getAnyInitializer(); 3530 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3531 !CGF.isTrivialInitializer(Init)))) { 3532 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3533 if (auto *Elem = Pair.second.PrivateElemInit) { 3534 auto *OriginalVD = Pair.second.Original; 3535 auto *SharedField = CapturesInfo.lookup(OriginalVD); 3536 auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3537 SharedRefLValue = CGF.MakeAddrLValue( 3538 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 3539 SharedRefLValue.getType(), AlignmentSource::Decl); 3540 QualType Type = OriginalVD->getType(); 3541 if (Type->isArrayType()) { 3542 // Initialize firstprivate array. 3543 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3544 // Perform simple memcpy. 3545 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 3546 SharedRefLValue.getAddress(), Type); 3547 } else { 3548 // Initialize firstprivate array using element-by-element 3549 // intialization. 3550 CGF.EmitOMPAggregateAssign( 3551 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 3552 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3553 Address SrcElement) { 3554 // Clean up any temporaries needed by the initialization. 3555 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3556 InitScope.addPrivate( 3557 Elem, [SrcElement]() -> Address { return SrcElement; }); 3558 (void)InitScope.Privatize(); 3559 // Emit initialization for single element. 3560 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3561 CGF, &CapturesInfo); 3562 CGF.EmitAnyExprToMem(Init, DestElement, 3563 Init->getType().getQualifiers(), 3564 /*IsInitializer=*/false); 3565 }); 3566 } 3567 } else { 3568 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3569 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 3570 return SharedRefLValue.getAddress(); 3571 }); 3572 (void)InitScope.Privatize(); 3573 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3574 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3575 /*capturedByInit=*/false); 3576 } 3577 } else 3578 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3579 } 3580 ++FI; 3581 } 3582 } 3583 3584 /// Check if duplication function is required for taskloops. 3585 static bool checkInitIsRequired(CodeGenFunction &CGF, 3586 ArrayRef<PrivateDataTy> Privates) { 3587 bool InitRequired = false; 3588 for (auto &&Pair : Privates) { 3589 auto *VD = Pair.second.PrivateCopy; 3590 auto *Init = VD->getAnyInitializer(); 3591 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3592 !CGF.isTrivialInitializer(Init)); 3593 } 3594 return InitRequired; 3595 } 3596 3597 3598 /// Emit task_dup function (for initialization of 3599 /// private/firstprivate/lastprivate vars and last_iter flag) 3600 /// \code 3601 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3602 /// lastpriv) { 3603 /// // setup lastprivate flag 3604 /// task_dst->last = lastpriv; 3605 /// // could be constructor calls here... 3606 /// } 3607 /// \endcode 3608 static llvm::Value * 3609 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3610 const OMPExecutableDirective &D, 3611 QualType KmpTaskTWithPrivatesPtrQTy, 3612 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3613 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3614 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3615 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3616 auto &C = CGM.getContext(); 3617 FunctionArgList Args; 3618 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, 3619 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3620 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, 3621 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3622 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, 3623 /*Id=*/nullptr, C.IntTy); 3624 Args.push_back(&DstArg); 3625 Args.push_back(&SrcArg); 3626 Args.push_back(&LastprivArg); 3627 auto &TaskDupFnInfo = 3628 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3629 auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3630 auto *TaskDup = 3631 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 3632 ".omp_task_dup.", &CGM.getModule()); 3633 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); 3634 CodeGenFunction CGF(CGM); 3635 CGF.disableDebugInfo(); 3636 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); 3637 3638 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3639 CGF.GetAddrOfLocalVar(&DstArg), 3640 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3641 // task_dst->liter = lastpriv; 3642 if (WithLastIter) { 3643 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3644 LValue Base = CGF.EmitLValueForField( 3645 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3646 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3647 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3648 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3649 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3650 } 3651 3652 // Emit initial values for private copies (if any). 3653 assert(!Privates.empty()); 3654 Address KmpTaskSharedsPtr = Address::invalid(); 3655 if (!Data.FirstprivateVars.empty()) { 3656 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3657 CGF.GetAddrOfLocalVar(&SrcArg), 3658 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3659 LValue Base = CGF.EmitLValueForField( 3660 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3661 KmpTaskSharedsPtr = Address( 3662 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3663 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3664 KmpTaskTShareds)), 3665 Loc), 3666 CGF.getNaturalTypeAlignment(SharedsTy)); 3667 } 3668 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3669 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3670 CGF.FinishFunction(); 3671 return TaskDup; 3672 } 3673 3674 /// Checks if destructor function is required to be generated. 3675 /// \return true if cleanups are required, false otherwise. 3676 static bool 3677 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 3678 bool NeedsCleanup = false; 3679 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3680 auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 3681 for (auto *FD : PrivateRD->fields()) { 3682 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 3683 if (NeedsCleanup) 3684 break; 3685 } 3686 return NeedsCleanup; 3687 } 3688 3689 CGOpenMPRuntime::TaskResultTy 3690 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3691 const OMPExecutableDirective &D, 3692 llvm::Value *TaskFunction, QualType SharedsTy, 3693 Address Shareds, const OMPTaskDataTy &Data) { 3694 auto &C = CGM.getContext(); 3695 llvm::SmallVector<PrivateDataTy, 4> Privates; 3696 // Aggregate privates and sort them by the alignment. 3697 auto I = Data.PrivateCopies.begin(); 3698 for (auto *E : Data.PrivateVars) { 3699 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3700 Privates.push_back(std::make_pair( 3701 C.getDeclAlign(VD), 3702 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3703 /*PrivateElemInit=*/nullptr))); 3704 ++I; 3705 } 3706 I = Data.FirstprivateCopies.begin(); 3707 auto IElemInitRef = Data.FirstprivateInits.begin(); 3708 for (auto *E : Data.FirstprivateVars) { 3709 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3710 Privates.push_back(std::make_pair( 3711 C.getDeclAlign(VD), 3712 PrivateHelpersTy( 3713 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3714 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 3715 ++I; 3716 ++IElemInitRef; 3717 } 3718 I = Data.LastprivateCopies.begin(); 3719 for (auto *E : Data.LastprivateVars) { 3720 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3721 Privates.push_back(std::make_pair( 3722 C.getDeclAlign(VD), 3723 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3724 /*PrivateElemInit=*/nullptr))); 3725 ++I; 3726 } 3727 llvm::array_pod_sort(Privates.begin(), Privates.end(), 3728 array_pod_sort_comparator); 3729 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3730 // Build type kmp_routine_entry_t (if not built yet). 3731 emitKmpRoutineEntryT(KmpInt32Ty); 3732 // Build type kmp_task_t (if not built yet). 3733 if (KmpTaskTQTy.isNull()) { 3734 KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3735 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3736 } 3737 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3738 // Build particular struct kmp_task_t for the given task. 3739 auto *KmpTaskTWithPrivatesQTyRD = 3740 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3741 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3742 QualType KmpTaskTWithPrivatesPtrQTy = 3743 C.getPointerType(KmpTaskTWithPrivatesQTy); 3744 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3745 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 3746 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3747 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3748 3749 // Emit initial values for private copies (if any). 3750 llvm::Value *TaskPrivatesMap = nullptr; 3751 auto *TaskPrivatesMapTy = 3752 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 3753 3) 3754 ->getType(); 3755 if (!Privates.empty()) { 3756 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3757 TaskPrivatesMap = emitTaskPrivateMappingFunction( 3758 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 3759 FI->getType(), Privates); 3760 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3761 TaskPrivatesMap, TaskPrivatesMapTy); 3762 } else { 3763 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3764 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3765 } 3766 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3767 // kmp_task_t *tt); 3768 auto *TaskEntry = emitProxyTaskFunction( 3769 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3770 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3771 TaskPrivatesMap); 3772 3773 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3774 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3775 // kmp_routine_entry_t *task_entry); 3776 // Task flags. Format is taken from 3777 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 3778 // description of kmp_tasking_flags struct. 3779 enum { 3780 TiedFlag = 0x1, 3781 FinalFlag = 0x2, 3782 DestructorsFlag = 0x8, 3783 PriorityFlag = 0x20 3784 }; 3785 unsigned Flags = Data.Tied ? TiedFlag : 0; 3786 bool NeedsCleanup = false; 3787 if (!Privates.empty()) { 3788 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 3789 if (NeedsCleanup) 3790 Flags = Flags | DestructorsFlag; 3791 } 3792 if (Data.Priority.getInt()) 3793 Flags = Flags | PriorityFlag; 3794 auto *TaskFlags = 3795 Data.Final.getPointer() 3796 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3797 CGF.Builder.getInt32(FinalFlag), 3798 CGF.Builder.getInt32(/*C=*/0)) 3799 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3800 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3801 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3802 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 3803 getThreadID(CGF, Loc), TaskFlags, 3804 KmpTaskTWithPrivatesTySize, SharedsSize, 3805 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3806 TaskEntry, KmpRoutineEntryPtrTy)}; 3807 auto *NewTask = CGF.EmitRuntimeCall( 3808 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 3809 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3810 NewTask, KmpTaskTWithPrivatesPtrTy); 3811 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 3812 KmpTaskTWithPrivatesQTy); 3813 LValue TDBase = 3814 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3815 // Fill the data in the resulting kmp_task_t record. 3816 // Copy shareds if there are any. 3817 Address KmpTaskSharedsPtr = Address::invalid(); 3818 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3819 KmpTaskSharedsPtr = 3820 Address(CGF.EmitLoadOfScalar( 3821 CGF.EmitLValueForField( 3822 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 3823 KmpTaskTShareds)), 3824 Loc), 3825 CGF.getNaturalTypeAlignment(SharedsTy)); 3826 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 3827 } 3828 // Emit initial values for private copies (if any). 3829 TaskResultTy Result; 3830 if (!Privates.empty()) { 3831 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3832 SharedsTy, SharedsPtrTy, Data, Privates, 3833 /*ForDup=*/false); 3834 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3835 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3836 Result.TaskDupFn = emitTaskDupFunction( 3837 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3838 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3839 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3840 } 3841 } 3842 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 3843 enum { Priority = 0, Destructors = 1 }; 3844 // Provide pointer to function with destructors for privates. 3845 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 3846 auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); 3847 if (NeedsCleanup) { 3848 llvm::Value *DestructorFn = emitDestructorsFunction( 3849 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3850 KmpTaskTWithPrivatesQTy); 3851 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 3852 LValue DestructorsLV = CGF.EmitLValueForField( 3853 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 3854 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3855 DestructorFn, KmpRoutineEntryPtrTy), 3856 DestructorsLV); 3857 } 3858 // Set priority. 3859 if (Data.Priority.getInt()) { 3860 LValue Data2LV = CGF.EmitLValueForField( 3861 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 3862 LValue PriorityLV = CGF.EmitLValueForField( 3863 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 3864 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 3865 } 3866 Result.NewTask = NewTask; 3867 Result.TaskEntry = TaskEntry; 3868 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3869 Result.TDBase = TDBase; 3870 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3871 return Result; 3872 } 3873 3874 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 3875 const OMPExecutableDirective &D, 3876 llvm::Value *TaskFunction, 3877 QualType SharedsTy, Address Shareds, 3878 const Expr *IfCond, 3879 const OMPTaskDataTy &Data) { 3880 if (!CGF.HaveInsertPoint()) 3881 return; 3882 3883 TaskResultTy Result = 3884 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 3885 llvm::Value *NewTask = Result.NewTask; 3886 llvm::Value *TaskEntry = Result.TaskEntry; 3887 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 3888 LValue TDBase = Result.TDBase; 3889 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 3890 auto &C = CGM.getContext(); 3891 // Process list of dependences. 3892 Address DependenciesArray = Address::invalid(); 3893 unsigned NumDependencies = Data.Dependences.size(); 3894 if (NumDependencies) { 3895 // Dependence kind for RTL. 3896 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 3897 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 3898 RecordDecl *KmpDependInfoRD; 3899 QualType FlagsTy = 3900 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 3901 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 3902 if (KmpDependInfoTy.isNull()) { 3903 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 3904 KmpDependInfoRD->startDefinition(); 3905 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 3906 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 3907 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 3908 KmpDependInfoRD->completeDefinition(); 3909 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 3910 } else 3911 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 3912 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 3913 // Define type kmp_depend_info[<Dependences.size()>]; 3914 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 3915 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 3916 ArrayType::Normal, /*IndexTypeQuals=*/0); 3917 // kmp_depend_info[<Dependences.size()>] deps; 3918 DependenciesArray = 3919 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 3920 for (unsigned i = 0; i < NumDependencies; ++i) { 3921 const Expr *E = Data.Dependences[i].second; 3922 auto Addr = CGF.EmitLValue(E); 3923 llvm::Value *Size; 3924 QualType Ty = E->getType(); 3925 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3926 LValue UpAddrLVal = 3927 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 3928 llvm::Value *UpAddr = 3929 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 3930 llvm::Value *LowIntPtr = 3931 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 3932 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 3933 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3934 } else 3935 Size = CGF.getTypeSize(Ty); 3936 auto Base = CGF.MakeAddrLValue( 3937 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 3938 KmpDependInfoTy); 3939 // deps[i].base_addr = &<Dependences[i].second>; 3940 auto BaseAddrLVal = CGF.EmitLValueForField( 3941 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 3942 CGF.EmitStoreOfScalar( 3943 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 3944 BaseAddrLVal); 3945 // deps[i].len = sizeof(<Dependences[i].second>); 3946 auto LenLVal = CGF.EmitLValueForField( 3947 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 3948 CGF.EmitStoreOfScalar(Size, LenLVal); 3949 // deps[i].flags = <Dependences[i].first>; 3950 RTLDependenceKindTy DepKind; 3951 switch (Data.Dependences[i].first) { 3952 case OMPC_DEPEND_in: 3953 DepKind = DepIn; 3954 break; 3955 // Out and InOut dependencies must use the same code. 3956 case OMPC_DEPEND_out: 3957 case OMPC_DEPEND_inout: 3958 DepKind = DepInOut; 3959 break; 3960 case OMPC_DEPEND_source: 3961 case OMPC_DEPEND_sink: 3962 case OMPC_DEPEND_unknown: 3963 llvm_unreachable("Unknown task dependence type"); 3964 } 3965 auto FlagsLVal = CGF.EmitLValueForField( 3966 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 3967 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 3968 FlagsLVal); 3969 } 3970 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3971 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 3972 CGF.VoidPtrTy); 3973 } 3974 3975 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 3976 // libcall. 3977 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 3978 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 3979 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 3980 // list is not empty 3981 auto *ThreadID = getThreadID(CGF, Loc); 3982 auto *UpLoc = emitUpdateLocation(CGF, Loc); 3983 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 3984 llvm::Value *DepTaskArgs[7]; 3985 if (NumDependencies) { 3986 DepTaskArgs[0] = UpLoc; 3987 DepTaskArgs[1] = ThreadID; 3988 DepTaskArgs[2] = NewTask; 3989 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 3990 DepTaskArgs[4] = DependenciesArray.getPointer(); 3991 DepTaskArgs[5] = CGF.Builder.getInt32(0); 3992 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3993 } 3994 auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD, 3995 NumDependencies, &TaskArgs, 3996 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 3997 if (!Data.Tied) { 3998 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3999 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4000 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4001 } 4002 if (NumDependencies) { 4003 CGF.EmitRuntimeCall( 4004 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 4005 } else { 4006 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 4007 TaskArgs); 4008 } 4009 // Check if parent region is untied and build return for untied task; 4010 if (auto *Region = 4011 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4012 Region->emitUntiedSwitch(CGF); 4013 }; 4014 4015 llvm::Value *DepWaitTaskArgs[6]; 4016 if (NumDependencies) { 4017 DepWaitTaskArgs[0] = UpLoc; 4018 DepWaitTaskArgs[1] = ThreadID; 4019 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 4020 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4021 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4022 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4023 } 4024 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 4025 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, 4026 PrePostActionTy &) { 4027 auto &RT = CGF.CGM.getOpenMPRuntime(); 4028 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4029 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4030 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4031 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4032 // is specified. 4033 if (NumDependencies) 4034 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 4035 DepWaitTaskArgs); 4036 // Call proxy_task_entry(gtid, new_task); 4037 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( 4038 CodeGenFunction &CGF, PrePostActionTy &Action) { 4039 Action.Enter(CGF); 4040 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4041 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 4042 }; 4043 4044 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4045 // kmp_task_t *new_task); 4046 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4047 // kmp_task_t *new_task); 4048 RegionCodeGenTy RCG(CodeGen); 4049 CommonActionTy Action( 4050 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 4051 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 4052 RCG.setAction(Action); 4053 RCG(CGF); 4054 }; 4055 4056 if (IfCond) 4057 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4058 else { 4059 RegionCodeGenTy ThenRCG(ThenCodeGen); 4060 ThenRCG(CGF); 4061 } 4062 } 4063 4064 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4065 const OMPLoopDirective &D, 4066 llvm::Value *TaskFunction, 4067 QualType SharedsTy, Address Shareds, 4068 const Expr *IfCond, 4069 const OMPTaskDataTy &Data) { 4070 if (!CGF.HaveInsertPoint()) 4071 return; 4072 TaskResultTy Result = 4073 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4074 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4075 // libcall. 4076 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4077 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4078 // sched, kmp_uint64 grainsize, void *task_dup); 4079 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4080 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4081 llvm::Value *IfVal; 4082 if (IfCond) { 4083 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4084 /*isSigned=*/true); 4085 } else 4086 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4087 4088 LValue LBLVal = CGF.EmitLValueForField( 4089 Result.TDBase, 4090 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4091 auto *LBVar = 4092 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4093 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4094 /*IsInitializer=*/true); 4095 LValue UBLVal = CGF.EmitLValueForField( 4096 Result.TDBase, 4097 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4098 auto *UBVar = 4099 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4100 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4101 /*IsInitializer=*/true); 4102 LValue StLVal = CGF.EmitLValueForField( 4103 Result.TDBase, 4104 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4105 auto *StVar = 4106 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4107 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4108 /*IsInitializer=*/true); 4109 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4110 llvm::Value *TaskArgs[] = { 4111 UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), 4112 UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), 4113 llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), 4114 llvm::ConstantInt::getSigned( 4115 CGF.IntTy, Data.Schedule.getPointer() 4116 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4117 : NoSchedule), 4118 Data.Schedule.getPointer() 4119 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4120 /*isSigned=*/false) 4121 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4122 Result.TaskDupFn 4123 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, 4124 CGF.VoidPtrTy) 4125 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4126 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 4127 } 4128 4129 /// \brief Emit reduction operation for each element of array (required for 4130 /// array sections) LHS op = RHS. 4131 /// \param Type Type of array. 4132 /// \param LHSVar Variable on the left side of the reduction operation 4133 /// (references element of array in original variable). 4134 /// \param RHSVar Variable on the right side of the reduction operation 4135 /// (references element of array in original variable). 4136 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4137 /// RHSVar. 4138 static void EmitOMPAggregateReduction( 4139 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4140 const VarDecl *RHSVar, 4141 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4142 const Expr *, const Expr *)> &RedOpGen, 4143 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4144 const Expr *UpExpr = nullptr) { 4145 // Perform element-by-element initialization. 4146 QualType ElementTy; 4147 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4148 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4149 4150 // Drill down to the base element type on both arrays. 4151 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 4152 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4153 4154 auto RHSBegin = RHSAddr.getPointer(); 4155 auto LHSBegin = LHSAddr.getPointer(); 4156 // Cast from pointer to array type to pointer to single element. 4157 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 4158 // The basic structure here is a while-do loop. 4159 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4160 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4161 auto IsEmpty = 4162 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4163 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4164 4165 // Enter the loop body, making that address the current address. 4166 auto EntryBB = CGF.Builder.GetInsertBlock(); 4167 CGF.EmitBlock(BodyBB); 4168 4169 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4170 4171 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4172 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4173 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4174 Address RHSElementCurrent = 4175 Address(RHSElementPHI, 4176 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4177 4178 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4179 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4180 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4181 Address LHSElementCurrent = 4182 Address(LHSElementPHI, 4183 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4184 4185 // Emit copy. 4186 CodeGenFunction::OMPPrivateScope Scope(CGF); 4187 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 4188 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 4189 Scope.Privatize(); 4190 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4191 Scope.ForceCleanup(); 4192 4193 // Shift the address forward by one element. 4194 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4195 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 4196 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4197 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 4198 // Check whether we've reached the end. 4199 auto Done = 4200 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4201 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4202 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4203 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4204 4205 // Done. 4206 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4207 } 4208 4209 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4210 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4211 /// UDR combiner function. 4212 static void emitReductionCombiner(CodeGenFunction &CGF, 4213 const Expr *ReductionOp) { 4214 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4215 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4216 if (auto *DRE = 4217 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4218 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4219 std::pair<llvm::Function *, llvm::Function *> Reduction = 4220 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4221 RValue Func = RValue::get(Reduction.first); 4222 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4223 CGF.EmitIgnoredExpr(ReductionOp); 4224 return; 4225 } 4226 CGF.EmitIgnoredExpr(ReductionOp); 4227 } 4228 4229 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 4230 llvm::Type *ArgsType, 4231 ArrayRef<const Expr *> Privates, 4232 ArrayRef<const Expr *> LHSExprs, 4233 ArrayRef<const Expr *> RHSExprs, 4234 ArrayRef<const Expr *> ReductionOps) { 4235 auto &C = CGM.getContext(); 4236 4237 // void reduction_func(void *LHSArg, void *RHSArg); 4238 FunctionArgList Args; 4239 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4240 C.VoidPtrTy); 4241 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4242 C.VoidPtrTy); 4243 Args.push_back(&LHSArg); 4244 Args.push_back(&RHSArg); 4245 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4246 auto *Fn = llvm::Function::Create( 4247 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 4248 ".omp.reduction.reduction_func", &CGM.getModule()); 4249 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 4250 CodeGenFunction CGF(CGM); 4251 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 4252 4253 // Dst = (void*[n])(LHSArg); 4254 // Src = (void*[n])(RHSArg); 4255 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4256 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4257 ArgsType), CGF.getPointerAlign()); 4258 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4259 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4260 ArgsType), CGF.getPointerAlign()); 4261 4262 // ... 4263 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4264 // ... 4265 CodeGenFunction::OMPPrivateScope Scope(CGF); 4266 auto IPriv = Privates.begin(); 4267 unsigned Idx = 0; 4268 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4269 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4270 Scope.addPrivate(RHSVar, [&]() -> Address { 4271 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 4272 }); 4273 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4274 Scope.addPrivate(LHSVar, [&]() -> Address { 4275 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 4276 }); 4277 QualType PrivTy = (*IPriv)->getType(); 4278 if (PrivTy->isVariablyModifiedType()) { 4279 // Get array size and emit VLA type. 4280 ++Idx; 4281 Address Elem = 4282 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 4283 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4284 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 4285 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4286 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4287 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4288 CGF.EmitVariablyModifiedType(PrivTy); 4289 } 4290 } 4291 Scope.Privatize(); 4292 IPriv = Privates.begin(); 4293 auto ILHS = LHSExprs.begin(); 4294 auto IRHS = RHSExprs.begin(); 4295 for (auto *E : ReductionOps) { 4296 if ((*IPriv)->getType()->isArrayType()) { 4297 // Emit reduction for array section. 4298 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4299 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4300 EmitOMPAggregateReduction( 4301 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4302 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4303 emitReductionCombiner(CGF, E); 4304 }); 4305 } else 4306 // Emit reduction for array subscript or single variable. 4307 emitReductionCombiner(CGF, E); 4308 ++IPriv; 4309 ++ILHS; 4310 ++IRHS; 4311 } 4312 Scope.ForceCleanup(); 4313 CGF.FinishFunction(); 4314 return Fn; 4315 } 4316 4317 static void emitSingleReductionCombiner(CodeGenFunction &CGF, 4318 const Expr *ReductionOp, 4319 const Expr *PrivateRef, 4320 const DeclRefExpr *LHS, 4321 const DeclRefExpr *RHS) { 4322 if (PrivateRef->getType()->isArrayType()) { 4323 // Emit reduction for array section. 4324 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4325 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4326 EmitOMPAggregateReduction( 4327 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4328 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4329 emitReductionCombiner(CGF, ReductionOp); 4330 }); 4331 } else 4332 // Emit reduction for array subscript or single variable. 4333 emitReductionCombiner(CGF, ReductionOp); 4334 } 4335 4336 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4337 ArrayRef<const Expr *> Privates, 4338 ArrayRef<const Expr *> LHSExprs, 4339 ArrayRef<const Expr *> RHSExprs, 4340 ArrayRef<const Expr *> ReductionOps, 4341 bool WithNowait, bool SimpleReduction) { 4342 if (!CGF.HaveInsertPoint()) 4343 return; 4344 // Next code should be emitted for reduction: 4345 // 4346 // static kmp_critical_name lock = { 0 }; 4347 // 4348 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4349 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4350 // ... 4351 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4352 // *(Type<n>-1*)rhs[<n>-1]); 4353 // } 4354 // 4355 // ... 4356 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4357 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4358 // RedList, reduce_func, &<lock>)) { 4359 // case 1: 4360 // ... 4361 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4362 // ... 4363 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4364 // break; 4365 // case 2: 4366 // ... 4367 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4368 // ... 4369 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4370 // break; 4371 // default:; 4372 // } 4373 // 4374 // if SimpleReduction is true, only the next code is generated: 4375 // ... 4376 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4377 // ... 4378 4379 auto &C = CGM.getContext(); 4380 4381 if (SimpleReduction) { 4382 CodeGenFunction::RunCleanupsScope Scope(CGF); 4383 auto IPriv = Privates.begin(); 4384 auto ILHS = LHSExprs.begin(); 4385 auto IRHS = RHSExprs.begin(); 4386 for (auto *E : ReductionOps) { 4387 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4388 cast<DeclRefExpr>(*IRHS)); 4389 ++IPriv; 4390 ++ILHS; 4391 ++IRHS; 4392 } 4393 return; 4394 } 4395 4396 // 1. Build a list of reduction variables. 4397 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4398 auto Size = RHSExprs.size(); 4399 for (auto *E : Privates) { 4400 if (E->getType()->isVariablyModifiedType()) 4401 // Reserve place for array size. 4402 ++Size; 4403 } 4404 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4405 QualType ReductionArrayTy = 4406 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 4407 /*IndexTypeQuals=*/0); 4408 Address ReductionList = 4409 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4410 auto IPriv = Privates.begin(); 4411 unsigned Idx = 0; 4412 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4413 Address Elem = 4414 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 4415 CGF.Builder.CreateStore( 4416 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4417 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 4418 Elem); 4419 if ((*IPriv)->getType()->isVariablyModifiedType()) { 4420 // Store array size. 4421 ++Idx; 4422 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 4423 CGF.getPointerSize()); 4424 llvm::Value *Size = CGF.Builder.CreateIntCast( 4425 CGF.getVLASize( 4426 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 4427 .first, 4428 CGF.SizeTy, /*isSigned=*/false); 4429 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 4430 Elem); 4431 } 4432 } 4433 4434 // 2. Emit reduce_func(). 4435 auto *ReductionFn = emitReductionFunction( 4436 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 4437 LHSExprs, RHSExprs, ReductionOps); 4438 4439 // 3. Create static kmp_critical_name lock = { 0 }; 4440 auto *Lock = getCriticalRegionLock(".reduction"); 4441 4442 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4443 // RedList, reduce_func, &<lock>); 4444 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 4445 auto *ThreadId = getThreadID(CGF, Loc); 4446 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 4447 auto *RL = 4448 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), 4449 CGF.VoidPtrTy); 4450 llvm::Value *Args[] = { 4451 IdentTLoc, // ident_t *<loc> 4452 ThreadId, // i32 <gtid> 4453 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 4454 ReductionArrayTySize, // size_type sizeof(RedList) 4455 RL, // void *RedList 4456 ReductionFn, // void (*) (void *, void *) <reduce_func> 4457 Lock // kmp_critical_name *&<lock> 4458 }; 4459 auto Res = CGF.EmitRuntimeCall( 4460 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 4461 : OMPRTL__kmpc_reduce), 4462 Args); 4463 4464 // 5. Build switch(res) 4465 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 4466 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 4467 4468 // 6. Build case 1: 4469 // ... 4470 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4471 // ... 4472 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4473 // break; 4474 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 4475 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 4476 CGF.EmitBlock(Case1BB); 4477 4478 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4479 llvm::Value *EndArgs[] = { 4480 IdentTLoc, // ident_t *<loc> 4481 ThreadId, // i32 <gtid> 4482 Lock // kmp_critical_name *&<lock> 4483 }; 4484 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4485 CodeGenFunction &CGF, PrePostActionTy &Action) { 4486 auto IPriv = Privates.begin(); 4487 auto ILHS = LHSExprs.begin(); 4488 auto IRHS = RHSExprs.begin(); 4489 for (auto *E : ReductionOps) { 4490 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4491 cast<DeclRefExpr>(*IRHS)); 4492 ++IPriv; 4493 ++ILHS; 4494 ++IRHS; 4495 } 4496 }; 4497 RegionCodeGenTy RCG(CodeGen); 4498 CommonActionTy Action( 4499 nullptr, llvm::None, 4500 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 4501 : OMPRTL__kmpc_end_reduce), 4502 EndArgs); 4503 RCG.setAction(Action); 4504 RCG(CGF); 4505 4506 CGF.EmitBranch(DefaultBB); 4507 4508 // 7. Build case 2: 4509 // ... 4510 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4511 // ... 4512 // break; 4513 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 4514 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 4515 CGF.EmitBlock(Case2BB); 4516 4517 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4518 CodeGenFunction &CGF, PrePostActionTy &Action) { 4519 auto ILHS = LHSExprs.begin(); 4520 auto IRHS = RHSExprs.begin(); 4521 auto IPriv = Privates.begin(); 4522 for (auto *E : ReductionOps) { 4523 const Expr *XExpr = nullptr; 4524 const Expr *EExpr = nullptr; 4525 const Expr *UpExpr = nullptr; 4526 BinaryOperatorKind BO = BO_Comma; 4527 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 4528 if (BO->getOpcode() == BO_Assign) { 4529 XExpr = BO->getLHS(); 4530 UpExpr = BO->getRHS(); 4531 } 4532 } 4533 // Try to emit update expression as a simple atomic. 4534 auto *RHSExpr = UpExpr; 4535 if (RHSExpr) { 4536 // Analyze RHS part of the whole expression. 4537 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 4538 RHSExpr->IgnoreParenImpCasts())) { 4539 // If this is a conditional operator, analyze its condition for 4540 // min/max reduction operator. 4541 RHSExpr = ACO->getCond(); 4542 } 4543 if (auto *BORHS = 4544 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 4545 EExpr = BORHS->getRHS(); 4546 BO = BORHS->getOpcode(); 4547 } 4548 } 4549 if (XExpr) { 4550 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4551 auto &&AtomicRedGen = [BO, VD, IPriv, 4552 Loc](CodeGenFunction &CGF, const Expr *XExpr, 4553 const Expr *EExpr, const Expr *UpExpr) { 4554 LValue X = CGF.EmitLValue(XExpr); 4555 RValue E; 4556 if (EExpr) 4557 E = CGF.EmitAnyExpr(EExpr); 4558 CGF.EmitOMPAtomicSimpleUpdateExpr( 4559 X, E, BO, /*IsXLHSInRHSPart=*/true, 4560 llvm::AtomicOrdering::Monotonic, Loc, 4561 [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { 4562 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4563 PrivateScope.addPrivate( 4564 VD, [&CGF, VD, XRValue, Loc]() -> Address { 4565 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 4566 CGF.emitOMPSimpleStore( 4567 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 4568 VD->getType().getNonReferenceType(), Loc); 4569 return LHSTemp; 4570 }); 4571 (void)PrivateScope.Privatize(); 4572 return CGF.EmitAnyExpr(UpExpr); 4573 }); 4574 }; 4575 if ((*IPriv)->getType()->isArrayType()) { 4576 // Emit atomic reduction for array section. 4577 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4578 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 4579 AtomicRedGen, XExpr, EExpr, UpExpr); 4580 } else 4581 // Emit atomic reduction for array subscript or single variable. 4582 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 4583 } else { 4584 // Emit as a critical region. 4585 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 4586 const Expr *, const Expr *) { 4587 auto &RT = CGF.CGM.getOpenMPRuntime(); 4588 RT.emitCriticalRegion( 4589 CGF, ".atomic_reduction", 4590 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 4591 Action.Enter(CGF); 4592 emitReductionCombiner(CGF, E); 4593 }, 4594 Loc); 4595 }; 4596 if ((*IPriv)->getType()->isArrayType()) { 4597 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4598 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4599 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4600 CritRedGen); 4601 } else 4602 CritRedGen(CGF, nullptr, nullptr, nullptr); 4603 } 4604 ++ILHS; 4605 ++IRHS; 4606 ++IPriv; 4607 } 4608 }; 4609 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 4610 if (!WithNowait) { 4611 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 4612 llvm::Value *EndArgs[] = { 4613 IdentTLoc, // ident_t *<loc> 4614 ThreadId, // i32 <gtid> 4615 Lock // kmp_critical_name *&<lock> 4616 }; 4617 CommonActionTy Action(nullptr, llvm::None, 4618 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 4619 EndArgs); 4620 AtomicRCG.setAction(Action); 4621 AtomicRCG(CGF); 4622 } else 4623 AtomicRCG(CGF); 4624 4625 CGF.EmitBranch(DefaultBB); 4626 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 4627 } 4628 4629 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 4630 SourceLocation Loc) { 4631 if (!CGF.HaveInsertPoint()) 4632 return; 4633 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 4634 // global_tid); 4635 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 4636 // Ignore return result until untied tasks are supported. 4637 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 4638 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4639 Region->emitUntiedSwitch(CGF); 4640 } 4641 4642 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 4643 OpenMPDirectiveKind InnerKind, 4644 const RegionCodeGenTy &CodeGen, 4645 bool HasCancel) { 4646 if (!CGF.HaveInsertPoint()) 4647 return; 4648 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 4649 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 4650 } 4651 4652 namespace { 4653 enum RTCancelKind { 4654 CancelNoreq = 0, 4655 CancelParallel = 1, 4656 CancelLoop = 2, 4657 CancelSections = 3, 4658 CancelTaskgroup = 4 4659 }; 4660 } // anonymous namespace 4661 4662 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 4663 RTCancelKind CancelKind = CancelNoreq; 4664 if (CancelRegion == OMPD_parallel) 4665 CancelKind = CancelParallel; 4666 else if (CancelRegion == OMPD_for) 4667 CancelKind = CancelLoop; 4668 else if (CancelRegion == OMPD_sections) 4669 CancelKind = CancelSections; 4670 else { 4671 assert(CancelRegion == OMPD_taskgroup); 4672 CancelKind = CancelTaskgroup; 4673 } 4674 return CancelKind; 4675 } 4676 4677 void CGOpenMPRuntime::emitCancellationPointCall( 4678 CodeGenFunction &CGF, SourceLocation Loc, 4679 OpenMPDirectiveKind CancelRegion) { 4680 if (!CGF.HaveInsertPoint()) 4681 return; 4682 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 4683 // global_tid, kmp_int32 cncl_kind); 4684 if (auto *OMPRegionInfo = 4685 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4686 if (OMPRegionInfo->hasCancel()) { 4687 llvm::Value *Args[] = { 4688 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 4689 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4690 // Ignore return result until untied tasks are supported. 4691 auto *Result = CGF.EmitRuntimeCall( 4692 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 4693 // if (__kmpc_cancellationpoint()) { 4694 // __kmpc_cancel_barrier(); 4695 // exit from construct; 4696 // } 4697 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4698 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4699 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4700 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4701 CGF.EmitBlock(ExitBB); 4702 // __kmpc_cancel_barrier(); 4703 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 4704 // exit from construct; 4705 auto CancelDest = 4706 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4707 CGF.EmitBranchThroughCleanup(CancelDest); 4708 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4709 } 4710 } 4711 } 4712 4713 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 4714 const Expr *IfCond, 4715 OpenMPDirectiveKind CancelRegion) { 4716 if (!CGF.HaveInsertPoint()) 4717 return; 4718 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 4719 // kmp_int32 cncl_kind); 4720 if (auto *OMPRegionInfo = 4721 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4722 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 4723 PrePostActionTy &) { 4724 auto &RT = CGF.CGM.getOpenMPRuntime(); 4725 llvm::Value *Args[] = { 4726 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 4727 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4728 // Ignore return result until untied tasks are supported. 4729 auto *Result = CGF.EmitRuntimeCall( 4730 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 4731 // if (__kmpc_cancel()) { 4732 // __kmpc_cancel_barrier(); 4733 // exit from construct; 4734 // } 4735 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4736 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4737 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4738 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4739 CGF.EmitBlock(ExitBB); 4740 // __kmpc_cancel_barrier(); 4741 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 4742 // exit from construct; 4743 auto CancelDest = 4744 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4745 CGF.EmitBranchThroughCleanup(CancelDest); 4746 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4747 }; 4748 if (IfCond) 4749 emitOMPIfClause(CGF, IfCond, ThenGen, 4750 [](CodeGenFunction &, PrePostActionTy &) {}); 4751 else { 4752 RegionCodeGenTy ThenRCG(ThenGen); 4753 ThenRCG(CGF); 4754 } 4755 } 4756 } 4757 4758 /// \brief Obtain information that uniquely identifies a target entry. This 4759 /// consists of the file and device IDs as well as line number associated with 4760 /// the relevant entry source location. 4761 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 4762 unsigned &DeviceID, unsigned &FileID, 4763 unsigned &LineNum) { 4764 4765 auto &SM = C.getSourceManager(); 4766 4767 // The loc should be always valid and have a file ID (the user cannot use 4768 // #pragma directives in macros) 4769 4770 assert(Loc.isValid() && "Source location is expected to be always valid."); 4771 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 4772 4773 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 4774 assert(PLoc.isValid() && "Source location is expected to be always valid."); 4775 4776 llvm::sys::fs::UniqueID ID; 4777 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 4778 llvm_unreachable("Source file with target region no longer exists!"); 4779 4780 DeviceID = ID.getDevice(); 4781 FileID = ID.getFile(); 4782 LineNum = PLoc.getLine(); 4783 } 4784 4785 void CGOpenMPRuntime::emitTargetOutlinedFunction( 4786 const OMPExecutableDirective &D, StringRef ParentName, 4787 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4788 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4789 assert(!ParentName.empty() && "Invalid target region parent name!"); 4790 4791 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 4792 IsOffloadEntry, CodeGen); 4793 } 4794 4795 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 4796 const OMPExecutableDirective &D, StringRef ParentName, 4797 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4798 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4799 // Create a unique name for the entry function using the source location 4800 // information of the current target region. The name will be something like: 4801 // 4802 // __omp_offloading_DD_FFFF_PP_lBB 4803 // 4804 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 4805 // mangled name of the function that encloses the target region and BB is the 4806 // line number of the target region. 4807 4808 unsigned DeviceID; 4809 unsigned FileID; 4810 unsigned Line; 4811 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 4812 Line); 4813 SmallString<64> EntryFnName; 4814 { 4815 llvm::raw_svector_ostream OS(EntryFnName); 4816 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 4817 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 4818 } 4819 4820 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4821 4822 CodeGenFunction CGF(CGM, true); 4823 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 4824 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4825 4826 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 4827 4828 // If this target outline function is not an offload entry, we don't need to 4829 // register it. 4830 if (!IsOffloadEntry) 4831 return; 4832 4833 // The target region ID is used by the runtime library to identify the current 4834 // target region, so it only has to be unique and not necessarily point to 4835 // anything. It could be the pointer to the outlined function that implements 4836 // the target region, but we aren't using that so that the compiler doesn't 4837 // need to keep that, and could therefore inline the host function if proven 4838 // worthwhile during optimization. In the other hand, if emitting code for the 4839 // device, the ID has to be the function address so that it can retrieved from 4840 // the offloading entry and launched by the runtime library. We also mark the 4841 // outlined function to have external linkage in case we are emitting code for 4842 // the device, because these functions will be entry points to the device. 4843 4844 if (CGM.getLangOpts().OpenMPIsDevice) { 4845 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 4846 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 4847 } else 4848 OutlinedFnID = new llvm::GlobalVariable( 4849 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 4850 llvm::GlobalValue::PrivateLinkage, 4851 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 4852 4853 // Register the information for the entry associated with this target region. 4854 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 4855 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); 4856 } 4857 4858 /// discard all CompoundStmts intervening between two constructs 4859 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 4860 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 4861 Body = CS->body_front(); 4862 4863 return Body; 4864 } 4865 4866 /// \brief Emit the num_teams clause of an enclosed teams directive at the 4867 /// target region scope. If there is no teams directive associated with the 4868 /// target directive, or if there is no num_teams clause associated with the 4869 /// enclosed teams directive, return nullptr. 4870 static llvm::Value * 4871 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4872 CodeGenFunction &CGF, 4873 const OMPExecutableDirective &D) { 4874 4875 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4876 "teams directive expected to be " 4877 "emitted only for the host!"); 4878 4879 // FIXME: For the moment we do not support combined directives with target and 4880 // teams, so we do not expect to get any num_teams clause in the provided 4881 // directive. Once we support that, this assertion can be replaced by the 4882 // actual emission of the clause expression. 4883 assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr && 4884 "Not expecting clause in directive."); 4885 4886 // If the current target region has a teams region enclosed, we need to get 4887 // the number of teams to pass to the runtime function call. This is done 4888 // by generating the expression in a inlined region. This is required because 4889 // the expression is captured in the enclosing target environment when the 4890 // teams directive is not combined with target. 4891 4892 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4893 4894 // FIXME: Accommodate other combined directives with teams when they become 4895 // available. 4896 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4897 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4898 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 4899 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4900 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4901 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 4902 return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, 4903 /*IsSigned=*/true); 4904 } 4905 4906 // If we have an enclosed teams directive but no num_teams clause we use 4907 // the default value 0. 4908 return CGF.Builder.getInt32(0); 4909 } 4910 4911 // No teams associated with the directive. 4912 return nullptr; 4913 } 4914 4915 /// \brief Emit the thread_limit clause of an enclosed teams directive at the 4916 /// target region scope. If there is no teams directive associated with the 4917 /// target directive, or if there is no thread_limit clause associated with the 4918 /// enclosed teams directive, return nullptr. 4919 static llvm::Value * 4920 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4921 CodeGenFunction &CGF, 4922 const OMPExecutableDirective &D) { 4923 4924 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4925 "teams directive expected to be " 4926 "emitted only for the host!"); 4927 4928 // FIXME: For the moment we do not support combined directives with target and 4929 // teams, so we do not expect to get any thread_limit clause in the provided 4930 // directive. Once we support that, this assertion can be replaced by the 4931 // actual emission of the clause expression. 4932 assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr && 4933 "Not expecting clause in directive."); 4934 4935 // If the current target region has a teams region enclosed, we need to get 4936 // the thread limit to pass to the runtime function call. This is done 4937 // by generating the expression in a inlined region. This is required because 4938 // the expression is captured in the enclosing target environment when the 4939 // teams directive is not combined with target. 4940 4941 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4942 4943 // FIXME: Accommodate other combined directives with teams when they become 4944 // available. 4945 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4946 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4947 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 4948 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4949 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4950 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 4951 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 4952 /*IsSigned=*/true); 4953 } 4954 4955 // If we have an enclosed teams directive but no thread_limit clause we use 4956 // the default value 0. 4957 return CGF.Builder.getInt32(0); 4958 } 4959 4960 // No teams associated with the directive. 4961 return nullptr; 4962 } 4963 4964 namespace { 4965 // \brief Utility to handle information from clauses associated with a given 4966 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 4967 // It provides a convenient interface to obtain the information and generate 4968 // code for that information. 4969 class MappableExprsHandler { 4970 public: 4971 /// \brief Values for bit flags used to specify the mapping type for 4972 /// offloading. 4973 enum OpenMPOffloadMappingFlags { 4974 /// \brief Allocate memory on the device and move data from host to device. 4975 OMP_MAP_TO = 0x01, 4976 /// \brief Allocate memory on the device and move data from device to host. 4977 OMP_MAP_FROM = 0x02, 4978 /// \brief Always perform the requested mapping action on the element, even 4979 /// if it was already mapped before. 4980 OMP_MAP_ALWAYS = 0x04, 4981 /// \brief Delete the element from the device environment, ignoring the 4982 /// current reference count associated with the element. 4983 OMP_MAP_DELETE = 0x08, 4984 /// \brief The element being mapped is a pointer, therefore the pointee 4985 /// should be mapped as well. 4986 OMP_MAP_IS_PTR = 0x10, 4987 /// \brief This flags signals that an argument is the first one relating to 4988 /// a map/private clause expression. For some cases a single 4989 /// map/privatization results in multiple arguments passed to the runtime 4990 /// library. 4991 OMP_MAP_FIRST_REF = 0x20, 4992 /// \brief Signal that the runtime library has to return the device pointer 4993 /// in the current position for the data being mapped. 4994 OMP_MAP_RETURN_PTR = 0x40, 4995 /// \brief This flag signals that the reference being passed is a pointer to 4996 /// private data. 4997 OMP_MAP_PRIVATE_PTR = 0x80, 4998 /// \brief Pass the element to the device by value. 4999 OMP_MAP_PRIVATE_VAL = 0x100, 5000 }; 5001 5002 /// Class that associates information with a base pointer to be passed to the 5003 /// runtime library. 5004 class BasePointerInfo { 5005 /// The base pointer. 5006 llvm::Value *Ptr = nullptr; 5007 /// The base declaration that refers to this device pointer, or null if 5008 /// there is none. 5009 const ValueDecl *DevPtrDecl = nullptr; 5010 5011 public: 5012 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 5013 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 5014 llvm::Value *operator*() const { return Ptr; } 5015 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 5016 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 5017 }; 5018 5019 typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; 5020 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 5021 typedef SmallVector<unsigned, 16> MapFlagsArrayTy; 5022 5023 private: 5024 /// \brief Directive from where the map clauses were extracted. 5025 const OMPExecutableDirective &CurDir; 5026 5027 /// \brief Function the directive is being generated for. 5028 CodeGenFunction &CGF; 5029 5030 /// \brief Set of all first private variables in the current directive. 5031 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 5032 5033 /// Map between device pointer declarations and their expression components. 5034 /// The key value for declarations in 'this' is null. 5035 llvm::DenseMap< 5036 const ValueDecl *, 5037 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 5038 DevPointersMap; 5039 5040 llvm::Value *getExprTypeSize(const Expr *E) const { 5041 auto ExprTy = E->getType().getCanonicalType(); 5042 5043 // Reference types are ignored for mapping purposes. 5044 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 5045 ExprTy = RefTy->getPointeeType().getCanonicalType(); 5046 5047 // Given that an array section is considered a built-in type, we need to 5048 // do the calculation based on the length of the section instead of relying 5049 // on CGF.getTypeSize(E->getType()). 5050 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 5051 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 5052 OAE->getBase()->IgnoreParenImpCasts()) 5053 .getCanonicalType(); 5054 5055 // If there is no length associated with the expression, that means we 5056 // are using the whole length of the base. 5057 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 5058 return CGF.getTypeSize(BaseTy); 5059 5060 llvm::Value *ElemSize; 5061 if (auto *PTy = BaseTy->getAs<PointerType>()) 5062 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 5063 else { 5064 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 5065 assert(ATy && "Expecting array type if not a pointer type."); 5066 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 5067 } 5068 5069 // If we don't have a length at this point, that is because we have an 5070 // array section with a single element. 5071 if (!OAE->getLength()) 5072 return ElemSize; 5073 5074 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 5075 LengthVal = 5076 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 5077 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 5078 } 5079 return CGF.getTypeSize(ExprTy); 5080 } 5081 5082 /// \brief Return the corresponding bits for a given map clause modifier. Add 5083 /// a flag marking the map as a pointer if requested. Add a flag marking the 5084 /// map as the first one of a series of maps that relate to the same map 5085 /// expression. 5086 unsigned getMapTypeBits(OpenMPMapClauseKind MapType, 5087 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 5088 bool AddIsFirstFlag) const { 5089 unsigned Bits = 0u; 5090 switch (MapType) { 5091 case OMPC_MAP_alloc: 5092 case OMPC_MAP_release: 5093 // alloc and release is the default behavior in the runtime library, i.e. 5094 // if we don't pass any bits alloc/release that is what the runtime is 5095 // going to do. Therefore, we don't need to signal anything for these two 5096 // type modifiers. 5097 break; 5098 case OMPC_MAP_to: 5099 Bits = OMP_MAP_TO; 5100 break; 5101 case OMPC_MAP_from: 5102 Bits = OMP_MAP_FROM; 5103 break; 5104 case OMPC_MAP_tofrom: 5105 Bits = OMP_MAP_TO | OMP_MAP_FROM; 5106 break; 5107 case OMPC_MAP_delete: 5108 Bits = OMP_MAP_DELETE; 5109 break; 5110 default: 5111 llvm_unreachable("Unexpected map type!"); 5112 break; 5113 } 5114 if (AddPtrFlag) 5115 Bits |= OMP_MAP_IS_PTR; 5116 if (AddIsFirstFlag) 5117 Bits |= OMP_MAP_FIRST_REF; 5118 if (MapTypeModifier == OMPC_MAP_always) 5119 Bits |= OMP_MAP_ALWAYS; 5120 return Bits; 5121 } 5122 5123 /// \brief Return true if the provided expression is a final array section. A 5124 /// final array section, is one whose length can't be proved to be one. 5125 bool isFinalArraySectionExpression(const Expr *E) const { 5126 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 5127 5128 // It is not an array section and therefore not a unity-size one. 5129 if (!OASE) 5130 return false; 5131 5132 // An array section with no colon always refer to a single element. 5133 if (OASE->getColonLoc().isInvalid()) 5134 return false; 5135 5136 auto *Length = OASE->getLength(); 5137 5138 // If we don't have a length we have to check if the array has size 1 5139 // for this dimension. Also, we should always expect a length if the 5140 // base type is pointer. 5141 if (!Length) { 5142 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 5143 OASE->getBase()->IgnoreParenImpCasts()) 5144 .getCanonicalType(); 5145 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 5146 return ATy->getSize().getSExtValue() != 1; 5147 // If we don't have a constant dimension length, we have to consider 5148 // the current section as having any size, so it is not necessarily 5149 // unitary. If it happen to be unity size, that's user fault. 5150 return true; 5151 } 5152 5153 // Check if the length evaluates to 1. 5154 llvm::APSInt ConstLength; 5155 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 5156 return true; // Can have more that size 1. 5157 5158 return ConstLength.getSExtValue() != 1; 5159 } 5160 5161 /// \brief Generate the base pointers, section pointers, sizes and map type 5162 /// bits for the provided map type, map modifier, and expression components. 5163 /// \a IsFirstComponent should be set to true if the provided set of 5164 /// components is the first associated with a capture. 5165 void generateInfoForComponentList( 5166 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5167 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5168 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 5169 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 5170 bool IsFirstComponentList) const { 5171 5172 // The following summarizes what has to be generated for each map and the 5173 // types bellow. The generated information is expressed in this order: 5174 // base pointer, section pointer, size, flags 5175 // (to add to the ones that come from the map type and modifier). 5176 // 5177 // double d; 5178 // int i[100]; 5179 // float *p; 5180 // 5181 // struct S1 { 5182 // int i; 5183 // float f[50]; 5184 // } 5185 // struct S2 { 5186 // int i; 5187 // float f[50]; 5188 // S1 s; 5189 // double *p; 5190 // struct S2 *ps; 5191 // } 5192 // S2 s; 5193 // S2 *ps; 5194 // 5195 // map(d) 5196 // &d, &d, sizeof(double), noflags 5197 // 5198 // map(i) 5199 // &i, &i, 100*sizeof(int), noflags 5200 // 5201 // map(i[1:23]) 5202 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 5203 // 5204 // map(p) 5205 // &p, &p, sizeof(float*), noflags 5206 // 5207 // map(p[1:24]) 5208 // p, &p[1], 24*sizeof(float), noflags 5209 // 5210 // map(s) 5211 // &s, &s, sizeof(S2), noflags 5212 // 5213 // map(s.i) 5214 // &s, &(s.i), sizeof(int), noflags 5215 // 5216 // map(s.s.f) 5217 // &s, &(s.i.f), 50*sizeof(int), noflags 5218 // 5219 // map(s.p) 5220 // &s, &(s.p), sizeof(double*), noflags 5221 // 5222 // map(s.p[:22], s.a s.b) 5223 // &s, &(s.p), sizeof(double*), noflags 5224 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag 5225 // 5226 // map(s.ps) 5227 // &s, &(s.ps), sizeof(S2*), noflags 5228 // 5229 // map(s.ps->s.i) 5230 // &s, &(s.ps), sizeof(S2*), noflags 5231 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag 5232 // 5233 // map(s.ps->ps) 5234 // &s, &(s.ps), sizeof(S2*), noflags 5235 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5236 // 5237 // map(s.ps->ps->ps) 5238 // &s, &(s.ps), sizeof(S2*), noflags 5239 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5240 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5241 // 5242 // map(s.ps->ps->s.f[:22]) 5243 // &s, &(s.ps), sizeof(S2*), noflags 5244 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5245 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag 5246 // 5247 // map(ps) 5248 // &ps, &ps, sizeof(S2*), noflags 5249 // 5250 // map(ps->i) 5251 // ps, &(ps->i), sizeof(int), noflags 5252 // 5253 // map(ps->s.f) 5254 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 5255 // 5256 // map(ps->p) 5257 // ps, &(ps->p), sizeof(double*), noflags 5258 // 5259 // map(ps->p[:22]) 5260 // ps, &(ps->p), sizeof(double*), noflags 5261 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag 5262 // 5263 // map(ps->ps) 5264 // ps, &(ps->ps), sizeof(S2*), noflags 5265 // 5266 // map(ps->ps->s.i) 5267 // ps, &(ps->ps), sizeof(S2*), noflags 5268 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag 5269 // 5270 // map(ps->ps->ps) 5271 // ps, &(ps->ps), sizeof(S2*), noflags 5272 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5273 // 5274 // map(ps->ps->ps->ps) 5275 // ps, &(ps->ps), sizeof(S2*), noflags 5276 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5277 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5278 // 5279 // map(ps->ps->ps->s.f[:22]) 5280 // ps, &(ps->ps), sizeof(S2*), noflags 5281 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5282 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + 5283 // extra_flag 5284 5285 // Track if the map information being generated is the first for a capture. 5286 bool IsCaptureFirstInfo = IsFirstComponentList; 5287 5288 // Scan the components from the base to the complete expression. 5289 auto CI = Components.rbegin(); 5290 auto CE = Components.rend(); 5291 auto I = CI; 5292 5293 // Track if the map information being generated is the first for a list of 5294 // components. 5295 bool IsExpressionFirstInfo = true; 5296 llvm::Value *BP = nullptr; 5297 5298 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 5299 // The base is the 'this' pointer. The content of the pointer is going 5300 // to be the base of the field being mapped. 5301 BP = CGF.EmitScalarExpr(ME->getBase()); 5302 } else { 5303 // The base is the reference to the variable. 5304 // BP = &Var. 5305 BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) 5306 .getPointer(); 5307 5308 // If the variable is a pointer and is being dereferenced (i.e. is not 5309 // the last component), the base has to be the pointer itself, not its 5310 // reference. References are ignored for mapping purposes. 5311 QualType Ty = 5312 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 5313 if (Ty->isAnyPointerType() && std::next(I) != CE) { 5314 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); 5315 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 5316 Ty->castAs<PointerType>()) 5317 .getPointer(); 5318 5319 // We do not need to generate individual map information for the 5320 // pointer, it can be associated with the combined storage. 5321 ++I; 5322 } 5323 } 5324 5325 for (; I != CE; ++I) { 5326 auto Next = std::next(I); 5327 5328 // We need to generate the addresses and sizes if this is the last 5329 // component, if the component is a pointer or if it is an array section 5330 // whose length can't be proved to be one. If this is a pointer, it 5331 // becomes the base address for the following components. 5332 5333 // A final array section, is one whose length can't be proved to be one. 5334 bool IsFinalArraySection = 5335 isFinalArraySectionExpression(I->getAssociatedExpression()); 5336 5337 // Get information on whether the element is a pointer. Have to do a 5338 // special treatment for array sections given that they are built-in 5339 // types. 5340 const auto *OASE = 5341 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 5342 bool IsPointer = 5343 (OASE && 5344 OMPArraySectionExpr::getBaseOriginalType(OASE) 5345 .getCanonicalType() 5346 ->isAnyPointerType()) || 5347 I->getAssociatedExpression()->getType()->isAnyPointerType(); 5348 5349 if (Next == CE || IsPointer || IsFinalArraySection) { 5350 5351 // If this is not the last component, we expect the pointer to be 5352 // associated with an array expression or member expression. 5353 assert((Next == CE || 5354 isa<MemberExpr>(Next->getAssociatedExpression()) || 5355 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 5356 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 5357 "Unexpected expression"); 5358 5359 auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); 5360 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 5361 5362 // If we have a member expression and the current component is a 5363 // reference, we have to map the reference too. Whenever we have a 5364 // reference, the section that reference refers to is going to be a 5365 // load instruction from the storage assigned to the reference. 5366 if (isa<MemberExpr>(I->getAssociatedExpression()) && 5367 I->getAssociatedDeclaration()->getType()->isReferenceType()) { 5368 auto *LI = cast<llvm::LoadInst>(LB); 5369 auto *RefAddr = LI->getPointerOperand(); 5370 5371 BasePointers.push_back(BP); 5372 Pointers.push_back(RefAddr); 5373 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 5374 Types.push_back(getMapTypeBits( 5375 /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, 5376 !IsExpressionFirstInfo, IsCaptureFirstInfo)); 5377 IsExpressionFirstInfo = false; 5378 IsCaptureFirstInfo = false; 5379 // The reference will be the next base address. 5380 BP = RefAddr; 5381 } 5382 5383 BasePointers.push_back(BP); 5384 Pointers.push_back(LB); 5385 Sizes.push_back(Size); 5386 5387 // We need to add a pointer flag for each map that comes from the 5388 // same expression except for the first one. We also need to signal 5389 // this map is the first one that relates with the current capture 5390 // (there is a set of entries for each capture). 5391 Types.push_back(getMapTypeBits(MapType, MapTypeModifier, 5392 !IsExpressionFirstInfo, 5393 IsCaptureFirstInfo)); 5394 5395 // If we have a final array section, we are done with this expression. 5396 if (IsFinalArraySection) 5397 break; 5398 5399 // The pointer becomes the base for the next element. 5400 if (Next != CE) 5401 BP = LB; 5402 5403 IsExpressionFirstInfo = false; 5404 IsCaptureFirstInfo = false; 5405 continue; 5406 } 5407 } 5408 } 5409 5410 /// \brief Return the adjusted map modifiers if the declaration a capture 5411 /// refers to appears in a first-private clause. This is expected to be used 5412 /// only with directives that start with 'target'. 5413 unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, 5414 unsigned CurrentModifiers) { 5415 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 5416 5417 // A first private variable captured by reference will use only the 5418 // 'private ptr' and 'map to' flag. Return the right flags if the captured 5419 // declaration is known as first-private in this handler. 5420 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 5421 return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | 5422 MappableExprsHandler::OMP_MAP_TO; 5423 5424 // We didn't modify anything. 5425 return CurrentModifiers; 5426 } 5427 5428 public: 5429 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 5430 : CurDir(Dir), CGF(CGF) { 5431 // Extract firstprivate clause information. 5432 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 5433 for (const auto *D : C->varlists()) 5434 FirstPrivateDecls.insert( 5435 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 5436 // Extract device pointer clause information. 5437 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 5438 for (auto L : C->component_lists()) 5439 DevPointersMap[L.first].push_back(L.second); 5440 } 5441 5442 /// \brief Generate all the base pointers, section pointers, sizes and map 5443 /// types for the extracted mappable expressions. Also, for each item that 5444 /// relates with a device pointer, a pair of the relevant declaration and 5445 /// index where it occurs is appended to the device pointers info array. 5446 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 5447 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 5448 MapFlagsArrayTy &Types) const { 5449 BasePointers.clear(); 5450 Pointers.clear(); 5451 Sizes.clear(); 5452 Types.clear(); 5453 5454 struct MapInfo { 5455 /// Kind that defines how a device pointer has to be returned. 5456 enum ReturnPointerKind { 5457 // Don't have to return any pointer. 5458 RPK_None, 5459 // Pointer is the base of the declaration. 5460 RPK_Base, 5461 // Pointer is a member of the base declaration - 'this' 5462 RPK_Member, 5463 // Pointer is a reference and a member of the base declaration - 'this' 5464 RPK_MemberReference, 5465 }; 5466 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 5467 OpenMPMapClauseKind MapType; 5468 OpenMPMapClauseKind MapTypeModifier; 5469 ReturnPointerKind ReturnDevicePointer; 5470 5471 MapInfo() 5472 : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), 5473 ReturnDevicePointer(RPK_None) {} 5474 MapInfo( 5475 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5476 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5477 ReturnPointerKind ReturnDevicePointer) 5478 : Components(Components), MapType(MapType), 5479 MapTypeModifier(MapTypeModifier), 5480 ReturnDevicePointer(ReturnDevicePointer) {} 5481 }; 5482 5483 // We have to process the component lists that relate with the same 5484 // declaration in a single chunk so that we can generate the map flags 5485 // correctly. Therefore, we organize all lists in a map. 5486 llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 5487 5488 // Helper function to fill the information map for the different supported 5489 // clauses. 5490 auto &&InfoGen = [&Info]( 5491 const ValueDecl *D, 5492 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 5493 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 5494 MapInfo::ReturnPointerKind ReturnDevicePointer) { 5495 const ValueDecl *VD = 5496 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 5497 Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); 5498 }; 5499 5500 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5501 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 5502 for (auto L : C->component_lists()) 5503 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 5504 MapInfo::RPK_None); 5505 for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 5506 for (auto L : C->component_lists()) 5507 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 5508 MapInfo::RPK_None); 5509 for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 5510 for (auto L : C->component_lists()) 5511 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 5512 MapInfo::RPK_None); 5513 5514 // Look at the use_device_ptr clause information and mark the existing map 5515 // entries as such. If there is no map information for an entry in the 5516 // use_device_ptr list, we create one with map type 'alloc' and zero size 5517 // section. It is the user fault if that was not mapped before. 5518 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5519 for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) 5520 for (auto L : C->component_lists()) { 5521 assert(!L.second.empty() && "Not expecting empty list of components!"); 5522 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 5523 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 5524 auto *IE = L.second.back().getAssociatedExpression(); 5525 // If the first component is a member expression, we have to look into 5526 // 'this', which maps to null in the map of map information. Otherwise 5527 // look directly for the information. 5528 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 5529 5530 // We potentially have map information for this declaration already. 5531 // Look for the first set of components that refer to it. 5532 if (It != Info.end()) { 5533 auto CI = std::find_if( 5534 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 5535 return MI.Components.back().getAssociatedDeclaration() == VD; 5536 }); 5537 // If we found a map entry, signal that the pointer has to be returned 5538 // and move on to the next declaration. 5539 if (CI != It->second.end()) { 5540 CI->ReturnDevicePointer = isa<MemberExpr>(IE) 5541 ? (VD->getType()->isReferenceType() 5542 ? MapInfo::RPK_MemberReference 5543 : MapInfo::RPK_Member) 5544 : MapInfo::RPK_Base; 5545 continue; 5546 } 5547 } 5548 5549 // We didn't find any match in our map information - generate a zero 5550 // size array section. 5551 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 5552 llvm::Value *Ptr = 5553 this->CGF 5554 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation()) 5555 .getScalarVal(); 5556 BasePointers.push_back({Ptr, VD}); 5557 Pointers.push_back(Ptr); 5558 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 5559 Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); 5560 } 5561 5562 for (auto &M : Info) { 5563 // We need to know when we generate information for the first component 5564 // associated with a capture, because the mapping flags depend on it. 5565 bool IsFirstComponentList = true; 5566 for (MapInfo &L : M.second) { 5567 assert(!L.Components.empty() && 5568 "Not expecting declaration with no component lists."); 5569 5570 // Remember the current base pointer index. 5571 unsigned CurrentBasePointersIdx = BasePointers.size(); 5572 // FIXME: MSVC 2013 seems to require this-> to find the member method. 5573 this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, 5574 L.Components, BasePointers, Pointers, 5575 Sizes, Types, IsFirstComponentList); 5576 5577 // If this entry relates with a device pointer, set the relevant 5578 // declaration and add the 'return pointer' flag. 5579 if (IsFirstComponentList && 5580 L.ReturnDevicePointer != MapInfo::RPK_None) { 5581 // If the pointer is not the base of the map, we need to skip the 5582 // base. If it is a reference in a member field, we also need to skip 5583 // the map of the reference. 5584 if (L.ReturnDevicePointer != MapInfo::RPK_Base) { 5585 ++CurrentBasePointersIdx; 5586 if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) 5587 ++CurrentBasePointersIdx; 5588 } 5589 assert(BasePointers.size() > CurrentBasePointersIdx && 5590 "Unexpected number of mapped base pointers."); 5591 5592 auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); 5593 assert(RelevantVD && 5594 "No relevant declaration related with device pointer??"); 5595 5596 BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 5597 Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; 5598 } 5599 IsFirstComponentList = false; 5600 } 5601 } 5602 } 5603 5604 /// \brief Generate the base pointers, section pointers, sizes and map types 5605 /// associated to a given capture. 5606 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 5607 llvm::Value *Arg, 5608 MapBaseValuesArrayTy &BasePointers, 5609 MapValuesArrayTy &Pointers, 5610 MapValuesArrayTy &Sizes, 5611 MapFlagsArrayTy &Types) const { 5612 assert(!Cap->capturesVariableArrayType() && 5613 "Not expecting to generate map info for a variable array type!"); 5614 5615 BasePointers.clear(); 5616 Pointers.clear(); 5617 Sizes.clear(); 5618 Types.clear(); 5619 5620 // We need to know when we generating information for the first component 5621 // associated with a capture, because the mapping flags depend on it. 5622 bool IsFirstComponentList = true; 5623 5624 const ValueDecl *VD = 5625 Cap->capturesThis() 5626 ? nullptr 5627 : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); 5628 5629 // If this declaration appears in a is_device_ptr clause we just have to 5630 // pass the pointer by value. If it is a reference to a declaration, we just 5631 // pass its value, otherwise, if it is a member expression, we need to map 5632 // 'to' the field. 5633 if (!VD) { 5634 auto It = DevPointersMap.find(VD); 5635 if (It != DevPointersMap.end()) { 5636 for (auto L : It->second) { 5637 generateInfoForComponentList( 5638 /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, 5639 BasePointers, Pointers, Sizes, Types, IsFirstComponentList); 5640 IsFirstComponentList = false; 5641 } 5642 return; 5643 } 5644 } else if (DevPointersMap.count(VD)) { 5645 BasePointers.push_back({Arg, VD}); 5646 Pointers.push_back(Arg); 5647 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 5648 Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF); 5649 return; 5650 } 5651 5652 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5653 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 5654 for (auto L : C->decl_component_lists(VD)) { 5655 assert(L.first == VD && 5656 "We got information for the wrong declaration??"); 5657 assert(!L.second.empty() && 5658 "Not expecting declaration with no component lists."); 5659 generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), 5660 L.second, BasePointers, Pointers, Sizes, 5661 Types, IsFirstComponentList); 5662 IsFirstComponentList = false; 5663 } 5664 5665 return; 5666 } 5667 5668 /// \brief Generate the default map information for a given capture \a CI, 5669 /// record field declaration \a RI and captured value \a CV. 5670 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 5671 const FieldDecl &RI, llvm::Value *CV, 5672 MapBaseValuesArrayTy &CurBasePointers, 5673 MapValuesArrayTy &CurPointers, 5674 MapValuesArrayTy &CurSizes, 5675 MapFlagsArrayTy &CurMapTypes) { 5676 5677 // Do the default mapping. 5678 if (CI.capturesThis()) { 5679 CurBasePointers.push_back(CV); 5680 CurPointers.push_back(CV); 5681 const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 5682 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 5683 // Default map type. 5684 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 5685 } else if (CI.capturesVariableByCopy()) { 5686 CurBasePointers.push_back(CV); 5687 CurPointers.push_back(CV); 5688 if (!RI.getType()->isAnyPointerType()) { 5689 // We have to signal to the runtime captures passed by value that are 5690 // not pointers. 5691 CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); 5692 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 5693 } else { 5694 // Pointers are implicitly mapped with a zero size and no flags 5695 // (other than first map that is added for all implicit maps). 5696 CurMapTypes.push_back(0u); 5697 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 5698 } 5699 } else { 5700 assert(CI.capturesVariable() && "Expected captured reference."); 5701 CurBasePointers.push_back(CV); 5702 CurPointers.push_back(CV); 5703 5704 const ReferenceType *PtrTy = 5705 cast<ReferenceType>(RI.getType().getTypePtr()); 5706 QualType ElementType = PtrTy->getPointeeType(); 5707 CurSizes.push_back(CGF.getTypeSize(ElementType)); 5708 // The default map type for a scalar/complex type is 'to' because by 5709 // default the value doesn't have to be retrieved. For an aggregate 5710 // type, the default is 'tofrom'. 5711 CurMapTypes.push_back(ElementType->isAggregateType() 5712 ? (OMP_MAP_TO | OMP_MAP_FROM) 5713 : OMP_MAP_TO); 5714 5715 // If we have a capture by reference we may need to add the private 5716 // pointer flag if the base declaration shows in some first-private 5717 // clause. 5718 CurMapTypes.back() = 5719 adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); 5720 } 5721 // Every default map produces a single argument, so, it is always the 5722 // first one. 5723 CurMapTypes.back() |= OMP_MAP_FIRST_REF; 5724 } 5725 }; 5726 5727 enum OpenMPOffloadingReservedDeviceIDs { 5728 /// \brief Device ID if the device was not defined, runtime should get it 5729 /// from environment variables in the spec. 5730 OMP_DEVICEID_UNDEF = -1, 5731 }; 5732 } // anonymous namespace 5733 5734 /// \brief Emit the arrays used to pass the captures and map information to the 5735 /// offloading runtime library. If there is no map or capture information, 5736 /// return nullptr by reference. 5737 static void 5738 emitOffloadingArrays(CodeGenFunction &CGF, 5739 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 5740 MappableExprsHandler::MapValuesArrayTy &Pointers, 5741 MappableExprsHandler::MapValuesArrayTy &Sizes, 5742 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 5743 CGOpenMPRuntime::TargetDataInfo &Info) { 5744 auto &CGM = CGF.CGM; 5745 auto &Ctx = CGF.getContext(); 5746 5747 // Reset the array information. 5748 Info.clearArrayInfo(); 5749 Info.NumberOfPtrs = BasePointers.size(); 5750 5751 if (Info.NumberOfPtrs) { 5752 // Detect if we have any capture size requiring runtime evaluation of the 5753 // size so that a constant array could be eventually used. 5754 bool hasRuntimeEvaluationCaptureSize = false; 5755 for (auto *S : Sizes) 5756 if (!isa<llvm::Constant>(S)) { 5757 hasRuntimeEvaluationCaptureSize = true; 5758 break; 5759 } 5760 5761 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 5762 QualType PointerArrayType = 5763 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 5764 /*IndexTypeQuals=*/0); 5765 5766 Info.BasePointersArray = 5767 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 5768 Info.PointersArray = 5769 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 5770 5771 // If we don't have any VLA types or other types that require runtime 5772 // evaluation, we can use a constant array for the map sizes, otherwise we 5773 // need to fill up the arrays as we do for the pointers. 5774 if (hasRuntimeEvaluationCaptureSize) { 5775 QualType SizeArrayType = Ctx.getConstantArrayType( 5776 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 5777 /*IndexTypeQuals=*/0); 5778 Info.SizesArray = 5779 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 5780 } else { 5781 // We expect all the sizes to be constant, so we collect them to create 5782 // a constant array. 5783 SmallVector<llvm::Constant *, 16> ConstSizes; 5784 for (auto S : Sizes) 5785 ConstSizes.push_back(cast<llvm::Constant>(S)); 5786 5787 auto *SizesArrayInit = llvm::ConstantArray::get( 5788 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 5789 auto *SizesArrayGbl = new llvm::GlobalVariable( 5790 CGM.getModule(), SizesArrayInit->getType(), 5791 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5792 SizesArrayInit, ".offload_sizes"); 5793 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 5794 Info.SizesArray = SizesArrayGbl; 5795 } 5796 5797 // The map types are always constant so we don't need to generate code to 5798 // fill arrays. Instead, we create an array constant. 5799 llvm::Constant *MapTypesArrayInit = 5800 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 5801 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 5802 CGM.getModule(), MapTypesArrayInit->getType(), 5803 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5804 MapTypesArrayInit, ".offload_maptypes"); 5805 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 5806 Info.MapTypesArray = MapTypesArrayGbl; 5807 5808 for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { 5809 llvm::Value *BPVal = *BasePointers[i]; 5810 if (BPVal->getType()->isPointerTy()) 5811 BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); 5812 else { 5813 assert(BPVal->getType()->isIntegerTy() && 5814 "If not a pointer, the value type must be an integer."); 5815 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); 5816 } 5817 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 5818 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5819 Info.BasePointersArray, 0, i); 5820 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5821 CGF.Builder.CreateStore(BPVal, BPAddr); 5822 5823 if (Info.requiresDevicePointerInfo()) 5824 if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) 5825 Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); 5826 5827 llvm::Value *PVal = Pointers[i]; 5828 if (PVal->getType()->isPointerTy()) 5829 PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); 5830 else { 5831 assert(PVal->getType()->isIntegerTy() && 5832 "If not a pointer, the value type must be an integer."); 5833 PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); 5834 } 5835 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 5836 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5837 Info.PointersArray, 0, i); 5838 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5839 CGF.Builder.CreateStore(PVal, PAddr); 5840 5841 if (hasRuntimeEvaluationCaptureSize) { 5842 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 5843 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 5844 Info.SizesArray, 5845 /*Idx0=*/0, 5846 /*Idx1=*/i); 5847 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 5848 CGF.Builder.CreateStore( 5849 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 5850 SAddr); 5851 } 5852 } 5853 } 5854 } 5855 /// \brief Emit the arguments to be passed to the runtime library based on the 5856 /// arrays of pointers, sizes and map types. 5857 static void emitOffloadingArraysArgument( 5858 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 5859 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 5860 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 5861 auto &CGM = CGF.CGM; 5862 if (Info.NumberOfPtrs) { 5863 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5864 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5865 Info.BasePointersArray, 5866 /*Idx0=*/0, /*Idx1=*/0); 5867 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5868 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5869 Info.PointersArray, 5870 /*Idx0=*/0, 5871 /*Idx1=*/0); 5872 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5873 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 5874 /*Idx0=*/0, /*Idx1=*/0); 5875 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5876 llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), 5877 Info.MapTypesArray, 5878 /*Idx0=*/0, 5879 /*Idx1=*/0); 5880 } else { 5881 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5882 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5883 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 5884 MapTypesArrayArg = 5885 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 5886 } 5887 } 5888 5889 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 5890 const OMPExecutableDirective &D, 5891 llvm::Value *OutlinedFn, 5892 llvm::Value *OutlinedFnID, 5893 const Expr *IfCond, const Expr *Device, 5894 ArrayRef<llvm::Value *> CapturedVars) { 5895 if (!CGF.HaveInsertPoint()) 5896 return; 5897 5898 assert(OutlinedFn && "Invalid outlined function!"); 5899 5900 auto &Ctx = CGF.getContext(); 5901 5902 // Fill up the arrays with all the captured variables. 5903 MappableExprsHandler::MapValuesArrayTy KernelArgs; 5904 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 5905 MappableExprsHandler::MapValuesArrayTy Pointers; 5906 MappableExprsHandler::MapValuesArrayTy Sizes; 5907 MappableExprsHandler::MapFlagsArrayTy MapTypes; 5908 5909 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 5910 MappableExprsHandler::MapValuesArrayTy CurPointers; 5911 MappableExprsHandler::MapValuesArrayTy CurSizes; 5912 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 5913 5914 // Get mappable expression information. 5915 MappableExprsHandler MEHandler(D, CGF); 5916 5917 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5918 auto RI = CS.getCapturedRecordDecl()->field_begin(); 5919 auto CV = CapturedVars.begin(); 5920 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 5921 CE = CS.capture_end(); 5922 CI != CE; ++CI, ++RI, ++CV) { 5923 StringRef Name; 5924 QualType Ty; 5925 5926 CurBasePointers.clear(); 5927 CurPointers.clear(); 5928 CurSizes.clear(); 5929 CurMapTypes.clear(); 5930 5931 // VLA sizes are passed to the outlined region by copy and do not have map 5932 // information associated. 5933 if (CI->capturesVariableArrayType()) { 5934 CurBasePointers.push_back(*CV); 5935 CurPointers.push_back(*CV); 5936 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 5937 // Copy to the device as an argument. No need to retrieve it. 5938 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | 5939 MappableExprsHandler::OMP_MAP_FIRST_REF); 5940 } else { 5941 // If we have any information in the map clause, we use it, otherwise we 5942 // just do a default mapping. 5943 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 5944 CurSizes, CurMapTypes); 5945 if (CurBasePointers.empty()) 5946 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 5947 CurPointers, CurSizes, CurMapTypes); 5948 } 5949 // We expect to have at least an element of information for this capture. 5950 assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); 5951 assert(CurBasePointers.size() == CurPointers.size() && 5952 CurBasePointers.size() == CurSizes.size() && 5953 CurBasePointers.size() == CurMapTypes.size() && 5954 "Inconsistent map information sizes!"); 5955 5956 // The kernel args are always the first elements of the base pointers 5957 // associated with a capture. 5958 KernelArgs.push_back(*CurBasePointers.front()); 5959 // We need to append the results of this capture to what we already have. 5960 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 5961 Pointers.append(CurPointers.begin(), CurPointers.end()); 5962 Sizes.append(CurSizes.begin(), CurSizes.end()); 5963 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 5964 } 5965 5966 // Keep track on whether the host function has to be executed. 5967 auto OffloadErrorQType = 5968 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 5969 auto OffloadError = CGF.MakeAddrLValue( 5970 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 5971 OffloadErrorQType); 5972 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 5973 OffloadError); 5974 5975 // Fill up the pointer arrays and transfer execution to the device. 5976 auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, 5977 OutlinedFnID, OffloadError, OffloadErrorQType, 5978 &D](CodeGenFunction &CGF, PrePostActionTy &) { 5979 auto &RT = CGF.CGM.getOpenMPRuntime(); 5980 // Emit the offloading arrays. 5981 TargetDataInfo Info; 5982 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 5983 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 5984 Info.PointersArray, Info.SizesArray, 5985 Info.MapTypesArray, Info); 5986 5987 // On top of the arrays that were filled up, the target offloading call 5988 // takes as arguments the device id as well as the host pointer. The host 5989 // pointer is used by the runtime library to identify the current target 5990 // region, so it only has to be unique and not necessarily point to 5991 // anything. It could be the pointer to the outlined function that 5992 // implements the target region, but we aren't using that so that the 5993 // compiler doesn't need to keep that, and could therefore inline the host 5994 // function if proven worthwhile during optimization. 5995 5996 // From this point on, we need to have an ID of the target region defined. 5997 assert(OutlinedFnID && "Invalid outlined function ID!"); 5998 5999 // Emit device ID if any. 6000 llvm::Value *DeviceID; 6001 if (Device) 6002 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6003 CGF.Int32Ty, /*isSigned=*/true); 6004 else 6005 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6006 6007 // Emit the number of elements in the offloading arrays. 6008 llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6009 6010 // Return value of the runtime offloading call. 6011 llvm::Value *Return; 6012 6013 auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); 6014 auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); 6015 6016 // If we have NumTeams defined this means that we have an enclosed teams 6017 // region. Therefore we also expect to have ThreadLimit defined. These two 6018 // values should be defined in the presence of a teams directive, regardless 6019 // of having any clauses associated. If the user is using teams but no 6020 // clauses, these two values will be the default that should be passed to 6021 // the runtime library - a 32-bit integer with the value zero. 6022 if (NumTeams) { 6023 assert(ThreadLimit && "Thread limit expression should be available along " 6024 "with number of teams."); 6025 llvm::Value *OffloadingArgs[] = { 6026 DeviceID, OutlinedFnID, 6027 PointerNum, Info.BasePointersArray, 6028 Info.PointersArray, Info.SizesArray, 6029 Info.MapTypesArray, NumTeams, 6030 ThreadLimit}; 6031 Return = CGF.EmitRuntimeCall( 6032 RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); 6033 } else { 6034 llvm::Value *OffloadingArgs[] = { 6035 DeviceID, OutlinedFnID, 6036 PointerNum, Info.BasePointersArray, 6037 Info.PointersArray, Info.SizesArray, 6038 Info.MapTypesArray}; 6039 Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), 6040 OffloadingArgs); 6041 } 6042 6043 CGF.EmitStoreOfScalar(Return, OffloadError); 6044 }; 6045 6046 // Notify that the host version must be executed. 6047 auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { 6048 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), 6049 OffloadError); 6050 }; 6051 6052 // If we have a target function ID it means that we need to support 6053 // offloading, otherwise, just execute on the host. We need to execute on host 6054 // regardless of the conditional in the if clause if, e.g., the user do not 6055 // specify target triples. 6056 if (OutlinedFnID) { 6057 if (IfCond) 6058 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6059 else { 6060 RegionCodeGenTy ThenRCG(ThenGen); 6061 ThenRCG(CGF); 6062 } 6063 } else { 6064 RegionCodeGenTy ElseRCG(ElseGen); 6065 ElseRCG(CGF); 6066 } 6067 6068 // Check the error code and execute the host version if required. 6069 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 6070 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 6071 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 6072 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 6073 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 6074 6075 CGF.EmitBlock(OffloadFailedBlock); 6076 CGF.Builder.CreateCall(OutlinedFn, KernelArgs); 6077 CGF.EmitBranch(OffloadContBlock); 6078 6079 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 6080 } 6081 6082 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 6083 StringRef ParentName) { 6084 if (!S) 6085 return; 6086 6087 // If we find a OMP target directive, codegen the outline function and 6088 // register the result. 6089 // FIXME: Add other directives with target when they become supported. 6090 bool isTargetDirective = isa<OMPTargetDirective>(S); 6091 6092 if (isTargetDirective) { 6093 auto *E = cast<OMPExecutableDirective>(S); 6094 unsigned DeviceID; 6095 unsigned FileID; 6096 unsigned Line; 6097 getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, 6098 FileID, Line); 6099 6100 // Is this a target region that should not be emitted as an entry point? If 6101 // so just signal we are done with this target region. 6102 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 6103 ParentName, Line)) 6104 return; 6105 6106 llvm::Function *Fn; 6107 llvm::Constant *Addr; 6108 std::tie(Fn, Addr) = 6109 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( 6110 CGM, cast<OMPTargetDirective>(*E), ParentName, 6111 /*isOffloadEntry=*/true); 6112 assert(Fn && Addr && "Target region emission failed."); 6113 return; 6114 } 6115 6116 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 6117 if (!E->hasAssociatedStmt()) 6118 return; 6119 6120 scanForTargetRegionsFunctions( 6121 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 6122 ParentName); 6123 return; 6124 } 6125 6126 // If this is a lambda function, look into its body. 6127 if (auto *L = dyn_cast<LambdaExpr>(S)) 6128 S = L->getBody(); 6129 6130 // Keep looking for target regions recursively. 6131 for (auto *II : S->children()) 6132 scanForTargetRegionsFunctions(II, ParentName); 6133 } 6134 6135 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 6136 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 6137 6138 // If emitting code for the host, we do not process FD here. Instead we do 6139 // the normal code generation. 6140 if (!CGM.getLangOpts().OpenMPIsDevice) 6141 return false; 6142 6143 // Try to detect target regions in the function. 6144 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 6145 6146 // We should not emit any function othen that the ones created during the 6147 // scanning. Therefore, we signal that this function is completely dealt 6148 // with. 6149 return true; 6150 } 6151 6152 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 6153 if (!CGM.getLangOpts().OpenMPIsDevice) 6154 return false; 6155 6156 // Check if there are Ctors/Dtors in this declaration and look for target 6157 // regions in it. We use the complete variant to produce the kernel name 6158 // mangling. 6159 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 6160 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 6161 for (auto *Ctor : RD->ctors()) { 6162 StringRef ParentName = 6163 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 6164 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 6165 } 6166 auto *Dtor = RD->getDestructor(); 6167 if (Dtor) { 6168 StringRef ParentName = 6169 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 6170 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 6171 } 6172 } 6173 6174 // If we are in target mode we do not emit any global (declare target is not 6175 // implemented yet). Therefore we signal that GD was processed in this case. 6176 return true; 6177 } 6178 6179 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 6180 auto *VD = GD.getDecl(); 6181 if (isa<FunctionDecl>(VD)) 6182 return emitTargetFunctions(GD); 6183 6184 return emitTargetGlobalVariable(GD); 6185 } 6186 6187 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 6188 // If we have offloading in the current module, we need to emit the entries 6189 // now and register the offloading descriptor. 6190 createOffloadEntriesAndInfoMetadata(); 6191 6192 // Create and register the offloading binary descriptors. This is the main 6193 // entity that captures all the information about offloading in the current 6194 // compilation unit. 6195 return createOffloadingBinaryDescriptorRegistration(); 6196 } 6197 6198 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 6199 const OMPExecutableDirective &D, 6200 SourceLocation Loc, 6201 llvm::Value *OutlinedFn, 6202 ArrayRef<llvm::Value *> CapturedVars) { 6203 if (!CGF.HaveInsertPoint()) 6204 return; 6205 6206 auto *RTLoc = emitUpdateLocation(CGF, Loc); 6207 CodeGenFunction::RunCleanupsScope Scope(CGF); 6208 6209 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 6210 llvm::Value *Args[] = { 6211 RTLoc, 6212 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 6213 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 6214 llvm::SmallVector<llvm::Value *, 16> RealArgs; 6215 RealArgs.append(std::begin(Args), std::end(Args)); 6216 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 6217 6218 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 6219 CGF.EmitRuntimeCall(RTLFn, RealArgs); 6220 } 6221 6222 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 6223 const Expr *NumTeams, 6224 const Expr *ThreadLimit, 6225 SourceLocation Loc) { 6226 if (!CGF.HaveInsertPoint()) 6227 return; 6228 6229 auto *RTLoc = emitUpdateLocation(CGF, Loc); 6230 6231 llvm::Value *NumTeamsVal = 6232 (NumTeams) 6233 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 6234 CGF.CGM.Int32Ty, /* isSigned = */ true) 6235 : CGF.Builder.getInt32(0); 6236 6237 llvm::Value *ThreadLimitVal = 6238 (ThreadLimit) 6239 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 6240 CGF.CGM.Int32Ty, /* isSigned = */ true) 6241 : CGF.Builder.getInt32(0); 6242 6243 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 6244 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 6245 ThreadLimitVal}; 6246 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 6247 PushNumTeamsArgs); 6248 } 6249 6250 void CGOpenMPRuntime::emitTargetDataCalls( 6251 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6252 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 6253 if (!CGF.HaveInsertPoint()) 6254 return; 6255 6256 // Action used to replace the default codegen action and turn privatization 6257 // off. 6258 PrePostActionTy NoPrivAction; 6259 6260 // Generate the code for the opening of the data environment. Capture all the 6261 // arguments of the runtime call by reference because they are used in the 6262 // closing of the region. 6263 auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction]( 6264 CodeGenFunction &CGF, PrePostActionTy &) { 6265 // Fill up the arrays with all the mapped variables. 6266 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6267 MappableExprsHandler::MapValuesArrayTy Pointers; 6268 MappableExprsHandler::MapValuesArrayTy Sizes; 6269 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6270 6271 // Get map clause information. 6272 MappableExprsHandler MCHandler(D, CGF); 6273 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6274 6275 // Fill up the arrays and create the arguments. 6276 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6277 6278 llvm::Value *BasePointersArrayArg = nullptr; 6279 llvm::Value *PointersArrayArg = nullptr; 6280 llvm::Value *SizesArrayArg = nullptr; 6281 llvm::Value *MapTypesArrayArg = nullptr; 6282 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 6283 SizesArrayArg, MapTypesArrayArg, Info); 6284 6285 // Emit device ID if any. 6286 llvm::Value *DeviceID = nullptr; 6287 if (Device) 6288 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6289 CGF.Int32Ty, /*isSigned=*/true); 6290 else 6291 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6292 6293 // Emit the number of elements in the offloading arrays. 6294 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 6295 6296 llvm::Value *OffloadingArgs[] = { 6297 DeviceID, PointerNum, BasePointersArrayArg, 6298 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6299 auto &RT = CGF.CGM.getOpenMPRuntime(); 6300 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), 6301 OffloadingArgs); 6302 6303 // If device pointer privatization is required, emit the body of the region 6304 // here. It will have to be duplicated: with and without privatization. 6305 if (!Info.CaptureDeviceAddrMap.empty()) 6306 CodeGen(CGF); 6307 }; 6308 6309 // Generate code for the closing of the data region. 6310 auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF, 6311 PrePostActionTy &) { 6312 assert(Info.isValid() && "Invalid data environment closing arguments."); 6313 6314 llvm::Value *BasePointersArrayArg = nullptr; 6315 llvm::Value *PointersArrayArg = nullptr; 6316 llvm::Value *SizesArrayArg = nullptr; 6317 llvm::Value *MapTypesArrayArg = nullptr; 6318 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 6319 SizesArrayArg, MapTypesArrayArg, Info); 6320 6321 // Emit device ID if any. 6322 llvm::Value *DeviceID = nullptr; 6323 if (Device) 6324 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6325 CGF.Int32Ty, /*isSigned=*/true); 6326 else 6327 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6328 6329 // Emit the number of elements in the offloading arrays. 6330 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 6331 6332 llvm::Value *OffloadingArgs[] = { 6333 DeviceID, PointerNum, BasePointersArrayArg, 6334 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6335 auto &RT = CGF.CGM.getOpenMPRuntime(); 6336 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), 6337 OffloadingArgs); 6338 }; 6339 6340 // If we need device pointer privatization, we need to emit the body of the 6341 // region with no privatization in the 'else' branch of the conditional. 6342 // Otherwise, we don't have to do anything. 6343 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 6344 PrePostActionTy &) { 6345 if (!Info.CaptureDeviceAddrMap.empty()) { 6346 CodeGen.setAction(NoPrivAction); 6347 CodeGen(CGF); 6348 } 6349 }; 6350 6351 // We don't have to do anything to close the region if the if clause evaluates 6352 // to false. 6353 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6354 6355 if (IfCond) { 6356 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 6357 } else { 6358 RegionCodeGenTy RCG(BeginThenGen); 6359 RCG(CGF); 6360 } 6361 6362 // If we don't require privatization of device pointers, we emit the body in 6363 // between the runtime calls. This avoids duplicating the body code. 6364 if (Info.CaptureDeviceAddrMap.empty()) { 6365 CodeGen.setAction(NoPrivAction); 6366 CodeGen(CGF); 6367 } 6368 6369 if (IfCond) { 6370 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 6371 } else { 6372 RegionCodeGenTy RCG(EndThenGen); 6373 RCG(CGF); 6374 } 6375 } 6376 6377 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 6378 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6379 const Expr *Device) { 6380 if (!CGF.HaveInsertPoint()) 6381 return; 6382 6383 assert((isa<OMPTargetEnterDataDirective>(D) || 6384 isa<OMPTargetExitDataDirective>(D) || 6385 isa<OMPTargetUpdateDirective>(D)) && 6386 "Expecting either target enter, exit data, or update directives."); 6387 6388 // Generate the code for the opening of the data environment. 6389 auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { 6390 // Fill up the arrays with all the mapped variables. 6391 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6392 MappableExprsHandler::MapValuesArrayTy Pointers; 6393 MappableExprsHandler::MapValuesArrayTy Sizes; 6394 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6395 6396 // Get map clause information. 6397 MappableExprsHandler MEHandler(D, CGF); 6398 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6399 6400 // Fill up the arrays and create the arguments. 6401 TargetDataInfo Info; 6402 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6403 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 6404 Info.PointersArray, Info.SizesArray, 6405 Info.MapTypesArray, Info); 6406 6407 // Emit device ID if any. 6408 llvm::Value *DeviceID = nullptr; 6409 if (Device) 6410 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6411 CGF.Int32Ty, /*isSigned=*/true); 6412 else 6413 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6414 6415 // Emit the number of elements in the offloading arrays. 6416 auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6417 6418 llvm::Value *OffloadingArgs[] = { 6419 DeviceID, PointerNum, Info.BasePointersArray, 6420 Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; 6421 6422 auto &RT = CGF.CGM.getOpenMPRuntime(); 6423 // Select the right runtime function call for each expected standalone 6424 // directive. 6425 OpenMPRTLFunction RTLFn; 6426 switch (D.getDirectiveKind()) { 6427 default: 6428 llvm_unreachable("Unexpected standalone target data directive."); 6429 break; 6430 case OMPD_target_enter_data: 6431 RTLFn = OMPRTL__tgt_target_data_begin; 6432 break; 6433 case OMPD_target_exit_data: 6434 RTLFn = OMPRTL__tgt_target_data_end; 6435 break; 6436 case OMPD_target_update: 6437 RTLFn = OMPRTL__tgt_target_data_update; 6438 break; 6439 } 6440 CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); 6441 }; 6442 6443 // In the event we get an if clause, we don't have to take any action on the 6444 // else side. 6445 auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6446 6447 if (IfCond) { 6448 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6449 } else { 6450 RegionCodeGenTy ThenGenRCG(ThenGen); 6451 ThenGenRCG(CGF); 6452 } 6453 } 6454 6455 namespace { 6456 /// Kind of parameter in a function with 'declare simd' directive. 6457 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 6458 /// Attribute set of the parameter. 6459 struct ParamAttrTy { 6460 ParamKindTy Kind = Vector; 6461 llvm::APSInt StrideOrArg; 6462 llvm::APSInt Alignment; 6463 }; 6464 } // namespace 6465 6466 static unsigned evaluateCDTSize(const FunctionDecl *FD, 6467 ArrayRef<ParamAttrTy> ParamAttrs) { 6468 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 6469 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 6470 // of that clause. The VLEN value must be power of 2. 6471 // In other case the notion of the function`s "characteristic data type" (CDT) 6472 // is used to compute the vector length. 6473 // CDT is defined in the following order: 6474 // a) For non-void function, the CDT is the return type. 6475 // b) If the function has any non-uniform, non-linear parameters, then the 6476 // CDT is the type of the first such parameter. 6477 // c) If the CDT determined by a) or b) above is struct, union, or class 6478 // type which is pass-by-value (except for the type that maps to the 6479 // built-in complex data type), the characteristic data type is int. 6480 // d) If none of the above three cases is applicable, the CDT is int. 6481 // The VLEN is then determined based on the CDT and the size of vector 6482 // register of that ISA for which current vector version is generated. The 6483 // VLEN is computed using the formula below: 6484 // VLEN = sizeof(vector_register) / sizeof(CDT), 6485 // where vector register size specified in section 3.2.1 Registers and the 6486 // Stack Frame of original AMD64 ABI document. 6487 QualType RetType = FD->getReturnType(); 6488 if (RetType.isNull()) 6489 return 0; 6490 ASTContext &C = FD->getASTContext(); 6491 QualType CDT; 6492 if (!RetType.isNull() && !RetType->isVoidType()) 6493 CDT = RetType; 6494 else { 6495 unsigned Offset = 0; 6496 if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 6497 if (ParamAttrs[Offset].Kind == Vector) 6498 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 6499 ++Offset; 6500 } 6501 if (CDT.isNull()) { 6502 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 6503 if (ParamAttrs[I + Offset].Kind == Vector) { 6504 CDT = FD->getParamDecl(I)->getType(); 6505 break; 6506 } 6507 } 6508 } 6509 } 6510 if (CDT.isNull()) 6511 CDT = C.IntTy; 6512 CDT = CDT->getCanonicalTypeUnqualified(); 6513 if (CDT->isRecordType() || CDT->isUnionType()) 6514 CDT = C.IntTy; 6515 return C.getTypeSize(CDT); 6516 } 6517 6518 static void 6519 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 6520 const llvm::APSInt &VLENVal, 6521 ArrayRef<ParamAttrTy> ParamAttrs, 6522 OMPDeclareSimdDeclAttr::BranchStateTy State) { 6523 struct ISADataTy { 6524 char ISA; 6525 unsigned VecRegSize; 6526 }; 6527 ISADataTy ISAData[] = { 6528 { 6529 'b', 128 6530 }, // SSE 6531 { 6532 'c', 256 6533 }, // AVX 6534 { 6535 'd', 256 6536 }, // AVX2 6537 { 6538 'e', 512 6539 }, // AVX512 6540 }; 6541 llvm::SmallVector<char, 2> Masked; 6542 switch (State) { 6543 case OMPDeclareSimdDeclAttr::BS_Undefined: 6544 Masked.push_back('N'); 6545 Masked.push_back('M'); 6546 break; 6547 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 6548 Masked.push_back('N'); 6549 break; 6550 case OMPDeclareSimdDeclAttr::BS_Inbranch: 6551 Masked.push_back('M'); 6552 break; 6553 } 6554 for (auto Mask : Masked) { 6555 for (auto &Data : ISAData) { 6556 SmallString<256> Buffer; 6557 llvm::raw_svector_ostream Out(Buffer); 6558 Out << "_ZGV" << Data.ISA << Mask; 6559 if (!VLENVal) { 6560 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 6561 evaluateCDTSize(FD, ParamAttrs)); 6562 } else 6563 Out << VLENVal; 6564 for (auto &ParamAttr : ParamAttrs) { 6565 switch (ParamAttr.Kind){ 6566 case LinearWithVarStride: 6567 Out << 's' << ParamAttr.StrideOrArg; 6568 break; 6569 case Linear: 6570 Out << 'l'; 6571 if (!!ParamAttr.StrideOrArg) 6572 Out << ParamAttr.StrideOrArg; 6573 break; 6574 case Uniform: 6575 Out << 'u'; 6576 break; 6577 case Vector: 6578 Out << 'v'; 6579 break; 6580 } 6581 if (!!ParamAttr.Alignment) 6582 Out << 'a' << ParamAttr.Alignment; 6583 } 6584 Out << '_' << Fn->getName(); 6585 Fn->addFnAttr(Out.str()); 6586 } 6587 } 6588 } 6589 6590 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 6591 llvm::Function *Fn) { 6592 ASTContext &C = CGM.getContext(); 6593 FD = FD->getCanonicalDecl(); 6594 // Map params to their positions in function decl. 6595 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 6596 if (isa<CXXMethodDecl>(FD)) 6597 ParamPositions.insert({FD, 0}); 6598 unsigned ParamPos = ParamPositions.size(); 6599 for (auto *P : FD->parameters()) { 6600 ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); 6601 ++ParamPos; 6602 } 6603 for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 6604 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 6605 // Mark uniform parameters. 6606 for (auto *E : Attr->uniforms()) { 6607 E = E->IgnoreParenImpCasts(); 6608 unsigned Pos; 6609 if (isa<CXXThisExpr>(E)) 6610 Pos = ParamPositions[FD]; 6611 else { 6612 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6613 ->getCanonicalDecl(); 6614 Pos = ParamPositions[PVD]; 6615 } 6616 ParamAttrs[Pos].Kind = Uniform; 6617 } 6618 // Get alignment info. 6619 auto NI = Attr->alignments_begin(); 6620 for (auto *E : Attr->aligneds()) { 6621 E = E->IgnoreParenImpCasts(); 6622 unsigned Pos; 6623 QualType ParmTy; 6624 if (isa<CXXThisExpr>(E)) { 6625 Pos = ParamPositions[FD]; 6626 ParmTy = E->getType(); 6627 } else { 6628 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6629 ->getCanonicalDecl(); 6630 Pos = ParamPositions[PVD]; 6631 ParmTy = PVD->getType(); 6632 } 6633 ParamAttrs[Pos].Alignment = 6634 (*NI) ? (*NI)->EvaluateKnownConstInt(C) 6635 : llvm::APSInt::getUnsigned( 6636 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 6637 .getQuantity()); 6638 ++NI; 6639 } 6640 // Mark linear parameters. 6641 auto SI = Attr->steps_begin(); 6642 auto MI = Attr->modifiers_begin(); 6643 for (auto *E : Attr->linears()) { 6644 E = E->IgnoreParenImpCasts(); 6645 unsigned Pos; 6646 if (isa<CXXThisExpr>(E)) 6647 Pos = ParamPositions[FD]; 6648 else { 6649 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6650 ->getCanonicalDecl(); 6651 Pos = ParamPositions[PVD]; 6652 } 6653 auto &ParamAttr = ParamAttrs[Pos]; 6654 ParamAttr.Kind = Linear; 6655 if (*SI) { 6656 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 6657 Expr::SE_AllowSideEffects)) { 6658 if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 6659 if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 6660 ParamAttr.Kind = LinearWithVarStride; 6661 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 6662 ParamPositions[StridePVD->getCanonicalDecl()]); 6663 } 6664 } 6665 } 6666 } 6667 ++SI; 6668 ++MI; 6669 } 6670 llvm::APSInt VLENVal; 6671 if (const Expr *VLEN = Attr->getSimdlen()) 6672 VLENVal = VLEN->EvaluateKnownConstInt(C); 6673 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 6674 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 6675 CGM.getTriple().getArch() == llvm::Triple::x86_64) 6676 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 6677 } 6678 } 6679 6680 namespace { 6681 /// Cleanup action for doacross support. 6682 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 6683 public: 6684 static const int DoacrossFinArgs = 2; 6685 6686 private: 6687 llvm::Value *RTLFn; 6688 llvm::Value *Args[DoacrossFinArgs]; 6689 6690 public: 6691 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 6692 : RTLFn(RTLFn) { 6693 assert(CallArgs.size() == DoacrossFinArgs); 6694 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 6695 } 6696 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 6697 if (!CGF.HaveInsertPoint()) 6698 return; 6699 CGF.EmitRuntimeCall(RTLFn, Args); 6700 } 6701 }; 6702 } // namespace 6703 6704 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 6705 const OMPLoopDirective &D) { 6706 if (!CGF.HaveInsertPoint()) 6707 return; 6708 6709 ASTContext &C = CGM.getContext(); 6710 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 6711 RecordDecl *RD; 6712 if (KmpDimTy.isNull()) { 6713 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 6714 // kmp_int64 lo; // lower 6715 // kmp_int64 up; // upper 6716 // kmp_int64 st; // stride 6717 // }; 6718 RD = C.buildImplicitRecord("kmp_dim"); 6719 RD->startDefinition(); 6720 addFieldToRecordDecl(C, RD, Int64Ty); 6721 addFieldToRecordDecl(C, RD, Int64Ty); 6722 addFieldToRecordDecl(C, RD, Int64Ty); 6723 RD->completeDefinition(); 6724 KmpDimTy = C.getRecordType(RD); 6725 } else 6726 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 6727 6728 Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); 6729 CGF.EmitNullInitialization(DimsAddr, KmpDimTy); 6730 enum { LowerFD = 0, UpperFD, StrideFD }; 6731 // Fill dims with data. 6732 LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); 6733 // dims.upper = num_iterations; 6734 LValue UpperLVal = 6735 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); 6736 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 6737 CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), 6738 Int64Ty, D.getNumIterations()->getExprLoc()); 6739 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 6740 // dims.stride = 1; 6741 LValue StrideLVal = 6742 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); 6743 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 6744 StrideLVal); 6745 6746 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 6747 // kmp_int32 num_dims, struct kmp_dim * dims); 6748 llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), 6749 getThreadID(CGF, D.getLocStart()), 6750 llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), 6751 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6752 DimsAddr.getPointer(), CGM.VoidPtrTy)}; 6753 6754 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 6755 CGF.EmitRuntimeCall(RTLFn, Args); 6756 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 6757 emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; 6758 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 6759 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 6760 llvm::makeArrayRef(FiniArgs)); 6761 } 6762 6763 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 6764 const OMPDependClause *C) { 6765 QualType Int64Ty = 6766 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 6767 const Expr *CounterVal = C->getCounterValue(); 6768 assert(CounterVal); 6769 llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), 6770 CounterVal->getType(), Int64Ty, 6771 CounterVal->getExprLoc()); 6772 Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); 6773 CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); 6774 llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), 6775 getThreadID(CGF, C->getLocStart()), 6776 CntAddr.getPointer()}; 6777 llvm::Value *RTLFn; 6778 if (C->getDependencyKind() == OMPC_DEPEND_source) 6779 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 6780 else { 6781 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 6782 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 6783 } 6784 CGF.EmitRuntimeCall(RTLFn, Args); 6785 } 6786 6787