1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "ConstantBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/Bitcode/BitcodeReader.h" 23 #include "llvm/IR/CallSite.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// \brief Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// \brief Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// \brief Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// \brief Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// \brief Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// \brief Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// \brief Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// \brief Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// \brief Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// \brief API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel) 103 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 104 HasCancel), 105 ThreadIDVar(ThreadIDVar) { 106 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 107 } 108 109 /// \brief Get a variable or parameter for storing global thread id 110 /// inside OpenMP construct. 111 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 112 113 /// \brief Get the name of the capture helper. 114 StringRef getHelperName() const override { return ".omp_outlined."; } 115 116 static bool classof(const CGCapturedStmtInfo *Info) { 117 return CGOpenMPRegionInfo::classof(Info) && 118 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 119 ParallelOutlinedRegion; 120 } 121 122 private: 123 /// \brief A variable or parameter storing global thread id for OpenMP 124 /// constructs. 125 const VarDecl *ThreadIDVar; 126 }; 127 128 /// \brief API for captured statement code generation in OpenMP constructs. 129 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 130 public: 131 class UntiedTaskActionTy final : public PrePostActionTy { 132 bool Untied; 133 const VarDecl *PartIDVar; 134 const RegionCodeGenTy UntiedCodeGen; 135 llvm::SwitchInst *UntiedSwitch = nullptr; 136 137 public: 138 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 139 const RegionCodeGenTy &UntiedCodeGen) 140 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 141 void Enter(CodeGenFunction &CGF) override { 142 if (Untied) { 143 // Emit task switching point. 144 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 145 CGF.GetAddrOfLocalVar(PartIDVar), 146 PartIDVar->getType()->castAs<PointerType>()); 147 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); 148 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 149 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 150 CGF.EmitBlock(DoneBB); 151 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 152 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 153 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 154 CGF.Builder.GetInsertBlock()); 155 emitUntiedSwitch(CGF); 156 } 157 } 158 void emitUntiedSwitch(CodeGenFunction &CGF) const { 159 if (Untied) { 160 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 161 CGF.GetAddrOfLocalVar(PartIDVar), 162 PartIDVar->getType()->castAs<PointerType>()); 163 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 164 PartIdLVal); 165 UntiedCodeGen(CGF); 166 CodeGenFunction::JumpDest CurPoint = 167 CGF.getJumpDestInCurrentScope(".untied.next."); 168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 170 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 171 CGF.Builder.GetInsertBlock()); 172 CGF.EmitBranchThroughCleanup(CurPoint); 173 CGF.EmitBlock(CurPoint.getBlock()); 174 } 175 } 176 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 177 }; 178 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 179 const VarDecl *ThreadIDVar, 180 const RegionCodeGenTy &CodeGen, 181 OpenMPDirectiveKind Kind, bool HasCancel, 182 const UntiedTaskActionTy &Action) 183 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 184 ThreadIDVar(ThreadIDVar), Action(Action) { 185 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 186 } 187 188 /// \brief Get a variable or parameter for storing global thread id 189 /// inside OpenMP construct. 190 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 191 192 /// \brief Get an LValue for the current ThreadID variable. 193 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 194 195 /// \brief Get the name of the capture helper. 196 StringRef getHelperName() const override { return ".omp_outlined."; } 197 198 void emitUntiedSwitch(CodeGenFunction &CGF) override { 199 Action.emitUntiedSwitch(CGF); 200 } 201 202 static bool classof(const CGCapturedStmtInfo *Info) { 203 return CGOpenMPRegionInfo::classof(Info) && 204 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 205 TaskOutlinedRegion; 206 } 207 208 private: 209 /// \brief A variable or parameter storing global thread id for OpenMP 210 /// constructs. 211 const VarDecl *ThreadIDVar; 212 /// Action for emitting code for untied tasks. 213 const UntiedTaskActionTy &Action; 214 }; 215 216 /// \brief API for inlined captured statement code generation in OpenMP 217 /// constructs. 218 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 219 public: 220 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 221 const RegionCodeGenTy &CodeGen, 222 OpenMPDirectiveKind Kind, bool HasCancel) 223 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 224 OldCSI(OldCSI), 225 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 226 227 // \brief Retrieve the value of the context parameter. 228 llvm::Value *getContextValue() const override { 229 if (OuterRegionInfo) 230 return OuterRegionInfo->getContextValue(); 231 llvm_unreachable("No context value for inlined OpenMP region"); 232 } 233 234 void setContextValue(llvm::Value *V) override { 235 if (OuterRegionInfo) { 236 OuterRegionInfo->setContextValue(V); 237 return; 238 } 239 llvm_unreachable("No context value for inlined OpenMP region"); 240 } 241 242 /// \brief Lookup the captured field decl for a variable. 243 const FieldDecl *lookup(const VarDecl *VD) const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->lookup(VD); 246 // If there is no outer outlined region,no need to lookup in a list of 247 // captured variables, we can use the original one. 248 return nullptr; 249 } 250 251 FieldDecl *getThisFieldDecl() const override { 252 if (OuterRegionInfo) 253 return OuterRegionInfo->getThisFieldDecl(); 254 return nullptr; 255 } 256 257 /// \brief Get a variable or parameter for storing global thread id 258 /// inside OpenMP construct. 259 const VarDecl *getThreadIDVariable() const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->getThreadIDVariable(); 262 return nullptr; 263 } 264 265 /// \brief Get the name of the capture helper. 266 StringRef getHelperName() const override { 267 if (auto *OuterRegionInfo = getOldCSI()) 268 return OuterRegionInfo->getHelperName(); 269 llvm_unreachable("No helper name for inlined OpenMP construct"); 270 } 271 272 void emitUntiedSwitch(CodeGenFunction &CGF) override { 273 if (OuterRegionInfo) 274 OuterRegionInfo->emitUntiedSwitch(CGF); 275 } 276 277 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 278 279 static bool classof(const CGCapturedStmtInfo *Info) { 280 return CGOpenMPRegionInfo::classof(Info) && 281 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 282 } 283 284 ~CGOpenMPInlinedRegionInfo() override = default; 285 286 private: 287 /// \brief CodeGen info about outer OpenMP region. 288 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 289 CGOpenMPRegionInfo *OuterRegionInfo; 290 }; 291 292 /// \brief API for captured statement code generation in OpenMP target 293 /// constructs. For this captures, implicit parameters are used instead of the 294 /// captured fields. The name of the target region has to be unique in a given 295 /// application so it is provided by the client, because only the client has 296 /// the information to generate that. 297 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 298 public: 299 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 300 const RegionCodeGenTy &CodeGen, StringRef HelperName) 301 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 302 /*HasCancel=*/false), 303 HelperName(HelperName) {} 304 305 /// \brief This is unused for target regions because each starts executing 306 /// with a single thread. 307 const VarDecl *getThreadIDVariable() const override { return nullptr; } 308 309 /// \brief Get the name of the capture helper. 310 StringRef getHelperName() const override { return HelperName; } 311 312 static bool classof(const CGCapturedStmtInfo *Info) { 313 return CGOpenMPRegionInfo::classof(Info) && 314 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 315 } 316 317 private: 318 StringRef HelperName; 319 }; 320 321 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 322 llvm_unreachable("No codegen for expressions"); 323 } 324 /// \brief API for generation of expressions captured in a innermost OpenMP 325 /// region. 326 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 327 public: 328 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 329 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 330 OMPD_unknown, 331 /*HasCancel=*/false), 332 PrivScope(CGF) { 333 // Make sure the globals captured in the provided statement are local by 334 // using the privatization logic. We assume the same variable is not 335 // captured more than once. 336 for (auto &C : CS.captures()) { 337 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 338 continue; 339 340 const VarDecl *VD = C.getCapturedVar(); 341 if (VD->isLocalVarDeclOrParm()) 342 continue; 343 344 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 345 /*RefersToEnclosingVariableOrCapture=*/false, 346 VD->getType().getNonReferenceType(), VK_LValue, 347 SourceLocation()); 348 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 349 return CGF.EmitLValue(&DRE).getAddress(); 350 }); 351 } 352 (void)PrivScope.Privatize(); 353 } 354 355 /// \brief Lookup the captured field decl for a variable. 356 const FieldDecl *lookup(const VarDecl *VD) const override { 357 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 358 return FD; 359 return nullptr; 360 } 361 362 /// \brief Emit the captured statement body. 363 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 364 llvm_unreachable("No body for expressions"); 365 } 366 367 /// \brief Get a variable or parameter for storing global thread id 368 /// inside OpenMP construct. 369 const VarDecl *getThreadIDVariable() const override { 370 llvm_unreachable("No thread id for expressions"); 371 } 372 373 /// \brief Get the name of the capture helper. 374 StringRef getHelperName() const override { 375 llvm_unreachable("No helper name for expressions"); 376 } 377 378 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 379 380 private: 381 /// Private scope to capture global variables. 382 CodeGenFunction::OMPPrivateScope PrivScope; 383 }; 384 385 /// \brief RAII for emitting code of OpenMP constructs. 386 class InlinedOpenMPRegionRAII { 387 CodeGenFunction &CGF; 388 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 389 FieldDecl *LambdaThisCaptureField = nullptr; 390 391 public: 392 /// \brief Constructs region for combined constructs. 393 /// \param CodeGen Code generation sequence for combined directives. Includes 394 /// a list of functions used for code generation of implicitly inlined 395 /// regions. 396 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 397 OpenMPDirectiveKind Kind, bool HasCancel) 398 : CGF(CGF) { 399 // Start emission for the construct. 400 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 401 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 402 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 403 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 404 CGF.LambdaThisCaptureField = nullptr; 405 } 406 407 ~InlinedOpenMPRegionRAII() { 408 // Restore original CapturedStmtInfo only if we're done with code emission. 409 auto *OldCSI = 410 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 411 delete CGF.CapturedStmtInfo; 412 CGF.CapturedStmtInfo = OldCSI; 413 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 414 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 415 } 416 }; 417 418 /// \brief Values for bit flags used in the ident_t to describe the fields. 419 /// All enumeric elements are named and described in accordance with the code 420 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 421 enum OpenMPLocationFlags { 422 /// \brief Use trampoline for internal microtask. 423 OMP_IDENT_IMD = 0x01, 424 /// \brief Use c-style ident structure. 425 OMP_IDENT_KMPC = 0x02, 426 /// \brief Atomic reduction option for kmpc_reduce. 427 OMP_ATOMIC_REDUCE = 0x10, 428 /// \brief Explicit 'barrier' directive. 429 OMP_IDENT_BARRIER_EXPL = 0x20, 430 /// \brief Implicit barrier in code. 431 OMP_IDENT_BARRIER_IMPL = 0x40, 432 /// \brief Implicit barrier in 'for' directive. 433 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 434 /// \brief Implicit barrier in 'sections' directive. 435 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 436 /// \brief Implicit barrier in 'single' directive. 437 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 438 }; 439 440 /// \brief Describes ident structure that describes a source location. 441 /// All descriptions are taken from 442 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 443 /// Original structure: 444 /// typedef struct ident { 445 /// kmp_int32 reserved_1; /**< might be used in Fortran; 446 /// see above */ 447 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 448 /// KMP_IDENT_KMPC identifies this union 449 /// member */ 450 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 451 /// see above */ 452 ///#if USE_ITT_BUILD 453 /// /* but currently used for storing 454 /// region-specific ITT */ 455 /// /* contextual information. */ 456 ///#endif /* USE_ITT_BUILD */ 457 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 458 /// C++ */ 459 /// char const *psource; /**< String describing the source location. 460 /// The string is composed of semi-colon separated 461 // fields which describe the source file, 462 /// the function and a pair of line numbers that 463 /// delimit the construct. 464 /// */ 465 /// } ident_t; 466 enum IdentFieldIndex { 467 /// \brief might be used in Fortran 468 IdentField_Reserved_1, 469 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 470 IdentField_Flags, 471 /// \brief Not really used in Fortran any more 472 IdentField_Reserved_2, 473 /// \brief Source[4] in Fortran, do not use for C++ 474 IdentField_Reserved_3, 475 /// \brief String describing the source location. The string is composed of 476 /// semi-colon separated fields which describe the source file, the function 477 /// and a pair of line numbers that delimit the construct. 478 IdentField_PSource 479 }; 480 481 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 482 /// the enum sched_type in kmp.h). 483 enum OpenMPSchedType { 484 /// \brief Lower bound for default (unordered) versions. 485 OMP_sch_lower = 32, 486 OMP_sch_static_chunked = 33, 487 OMP_sch_static = 34, 488 OMP_sch_dynamic_chunked = 35, 489 OMP_sch_guided_chunked = 36, 490 OMP_sch_runtime = 37, 491 OMP_sch_auto = 38, 492 /// static with chunk adjustment (e.g., simd) 493 OMP_sch_static_balanced_chunked = 45, 494 /// \brief Lower bound for 'ordered' versions. 495 OMP_ord_lower = 64, 496 OMP_ord_static_chunked = 65, 497 OMP_ord_static = 66, 498 OMP_ord_dynamic_chunked = 67, 499 OMP_ord_guided_chunked = 68, 500 OMP_ord_runtime = 69, 501 OMP_ord_auto = 70, 502 OMP_sch_default = OMP_sch_static, 503 /// \brief dist_schedule types 504 OMP_dist_sch_static_chunked = 91, 505 OMP_dist_sch_static = 92, 506 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 507 /// Set if the monotonic schedule modifier was present. 508 OMP_sch_modifier_monotonic = (1 << 29), 509 /// Set if the nonmonotonic schedule modifier was present. 510 OMP_sch_modifier_nonmonotonic = (1 << 30), 511 }; 512 513 enum OpenMPRTLFunction { 514 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 515 /// kmpc_micro microtask, ...); 516 OMPRTL__kmpc_fork_call, 517 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 518 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 519 OMPRTL__kmpc_threadprivate_cached, 520 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 521 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 522 OMPRTL__kmpc_threadprivate_register, 523 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 524 OMPRTL__kmpc_global_thread_num, 525 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 526 // kmp_critical_name *crit); 527 OMPRTL__kmpc_critical, 528 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 529 // global_tid, kmp_critical_name *crit, uintptr_t hint); 530 OMPRTL__kmpc_critical_with_hint, 531 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 532 // kmp_critical_name *crit); 533 OMPRTL__kmpc_end_critical, 534 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 535 // global_tid); 536 OMPRTL__kmpc_cancel_barrier, 537 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 538 OMPRTL__kmpc_barrier, 539 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 540 OMPRTL__kmpc_for_static_fini, 541 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 542 // global_tid); 543 OMPRTL__kmpc_serialized_parallel, 544 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 545 // global_tid); 546 OMPRTL__kmpc_end_serialized_parallel, 547 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 548 // kmp_int32 num_threads); 549 OMPRTL__kmpc_push_num_threads, 550 // Call to void __kmpc_flush(ident_t *loc); 551 OMPRTL__kmpc_flush, 552 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 553 OMPRTL__kmpc_master, 554 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 555 OMPRTL__kmpc_end_master, 556 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 557 // int end_part); 558 OMPRTL__kmpc_omp_taskyield, 559 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 560 OMPRTL__kmpc_single, 561 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 562 OMPRTL__kmpc_end_single, 563 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 564 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 565 // kmp_routine_entry_t *task_entry); 566 OMPRTL__kmpc_omp_task_alloc, 567 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 568 // new_task); 569 OMPRTL__kmpc_omp_task, 570 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 571 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 572 // kmp_int32 didit); 573 OMPRTL__kmpc_copyprivate, 574 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 575 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 576 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 577 OMPRTL__kmpc_reduce, 578 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 579 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 580 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 581 // *lck); 582 OMPRTL__kmpc_reduce_nowait, 583 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 584 // kmp_critical_name *lck); 585 OMPRTL__kmpc_end_reduce, 586 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 587 // kmp_critical_name *lck); 588 OMPRTL__kmpc_end_reduce_nowait, 589 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 590 // kmp_task_t * new_task); 591 OMPRTL__kmpc_omp_task_begin_if0, 592 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 593 // kmp_task_t * new_task); 594 OMPRTL__kmpc_omp_task_complete_if0, 595 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 596 OMPRTL__kmpc_ordered, 597 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 598 OMPRTL__kmpc_end_ordered, 599 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 600 // global_tid); 601 OMPRTL__kmpc_omp_taskwait, 602 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 603 OMPRTL__kmpc_taskgroup, 604 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 605 OMPRTL__kmpc_end_taskgroup, 606 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 607 // int proc_bind); 608 OMPRTL__kmpc_push_proc_bind, 609 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 610 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 611 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 612 OMPRTL__kmpc_omp_task_with_deps, 613 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 614 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 615 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 616 OMPRTL__kmpc_omp_wait_deps, 617 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 618 // global_tid, kmp_int32 cncl_kind); 619 OMPRTL__kmpc_cancellationpoint, 620 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 621 // kmp_int32 cncl_kind); 622 OMPRTL__kmpc_cancel, 623 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 624 // kmp_int32 num_teams, kmp_int32 thread_limit); 625 OMPRTL__kmpc_push_num_teams, 626 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 627 // microtask, ...); 628 OMPRTL__kmpc_fork_teams, 629 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 630 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 631 // sched, kmp_uint64 grainsize, void *task_dup); 632 OMPRTL__kmpc_taskloop, 633 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 634 // num_dims, struct kmp_dim *dims); 635 OMPRTL__kmpc_doacross_init, 636 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 637 OMPRTL__kmpc_doacross_fini, 638 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 639 // *vec); 640 OMPRTL__kmpc_doacross_post, 641 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 642 // *vec); 643 OMPRTL__kmpc_doacross_wait, 644 645 // 646 // Offloading related calls 647 // 648 // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 649 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 650 // *arg_types); 651 OMPRTL__tgt_target, 652 // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 653 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 654 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 655 OMPRTL__tgt_target_teams, 656 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 657 OMPRTL__tgt_register_lib, 658 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 659 OMPRTL__tgt_unregister_lib, 660 // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 661 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 662 OMPRTL__tgt_target_data_begin, 663 // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 664 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 665 OMPRTL__tgt_target_data_end, 666 // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 667 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 668 OMPRTL__tgt_target_data_update, 669 }; 670 671 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 672 /// region. 673 class CleanupTy final : public EHScopeStack::Cleanup { 674 PrePostActionTy *Action; 675 676 public: 677 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 678 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 679 if (!CGF.HaveInsertPoint()) 680 return; 681 Action->Exit(CGF); 682 } 683 }; 684 685 } // anonymous namespace 686 687 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 688 CodeGenFunction::RunCleanupsScope Scope(CGF); 689 if (PrePostAction) { 690 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 691 Callback(CodeGen, CGF, *PrePostAction); 692 } else { 693 PrePostActionTy Action; 694 Callback(CodeGen, CGF, Action); 695 } 696 } 697 698 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 699 return CGF.EmitLoadOfPointerLValue( 700 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 701 getThreadIDVariable()->getType()->castAs<PointerType>()); 702 } 703 704 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 705 if (!CGF.HaveInsertPoint()) 706 return; 707 // 1.2.2 OpenMP Language Terminology 708 // Structured block - An executable statement with a single entry at the 709 // top and a single exit at the bottom. 710 // The point of exit cannot be a branch out of the structured block. 711 // longjmp() and throw() must not violate the entry/exit criteria. 712 CGF.EHStack.pushTerminate(); 713 CodeGen(CGF); 714 CGF.EHStack.popTerminate(); 715 } 716 717 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 718 CodeGenFunction &CGF) { 719 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 720 getThreadIDVariable()->getType(), 721 AlignmentSource::Decl); 722 } 723 724 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 725 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 726 IdentTy = llvm::StructType::create( 727 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 728 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 729 CGM.Int8PtrTy /* psource */, nullptr); 730 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 731 732 loadOffloadInfoMetadata(); 733 } 734 735 void CGOpenMPRuntime::clear() { 736 InternalVars.clear(); 737 } 738 739 static llvm::Function * 740 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 741 const Expr *CombinerInitializer, const VarDecl *In, 742 const VarDecl *Out, bool IsCombiner) { 743 // void .omp_combiner.(Ty *in, Ty *out); 744 auto &C = CGM.getContext(); 745 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 746 FunctionArgList Args; 747 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 748 /*Id=*/nullptr, PtrTy); 749 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 750 /*Id=*/nullptr, PtrTy); 751 Args.push_back(&OmpOutParm); 752 Args.push_back(&OmpInParm); 753 auto &FnInfo = 754 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 755 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 756 auto *Fn = llvm::Function::Create( 757 FnTy, llvm::GlobalValue::InternalLinkage, 758 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 759 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 760 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 761 CodeGenFunction CGF(CGM); 762 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 763 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 764 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 765 CodeGenFunction::OMPPrivateScope Scope(CGF); 766 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 767 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 768 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 769 .getAddress(); 770 }); 771 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 772 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 773 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 774 .getAddress(); 775 }); 776 (void)Scope.Privatize(); 777 CGF.EmitIgnoredExpr(CombinerInitializer); 778 Scope.ForceCleanup(); 779 CGF.FinishFunction(); 780 return Fn; 781 } 782 783 void CGOpenMPRuntime::emitUserDefinedReduction( 784 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 785 if (UDRMap.count(D) > 0) 786 return; 787 auto &C = CGM.getContext(); 788 if (!In || !Out) { 789 In = &C.Idents.get("omp_in"); 790 Out = &C.Idents.get("omp_out"); 791 } 792 llvm::Function *Combiner = emitCombinerOrInitializer( 793 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 794 cast<VarDecl>(D->lookup(Out).front()), 795 /*IsCombiner=*/true); 796 llvm::Function *Initializer = nullptr; 797 if (auto *Init = D->getInitializer()) { 798 if (!Priv || !Orig) { 799 Priv = &C.Idents.get("omp_priv"); 800 Orig = &C.Idents.get("omp_orig"); 801 } 802 Initializer = emitCombinerOrInitializer( 803 CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), 804 cast<VarDecl>(D->lookup(Priv).front()), 805 /*IsCombiner=*/false); 806 } 807 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 808 if (CGF) { 809 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 810 Decls.second.push_back(D); 811 } 812 } 813 814 std::pair<llvm::Function *, llvm::Function *> 815 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 816 auto I = UDRMap.find(D); 817 if (I != UDRMap.end()) 818 return I->second; 819 emitUserDefinedReduction(/*CGF=*/nullptr, D); 820 return UDRMap.lookup(D); 821 } 822 823 // Layout information for ident_t. 824 static CharUnits getIdentAlign(CodeGenModule &CGM) { 825 return CGM.getPointerAlign(); 826 } 827 static CharUnits getIdentSize(CodeGenModule &CGM) { 828 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 829 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 830 } 831 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 832 // All the fields except the last are i32, so this works beautifully. 833 return unsigned(Field) * CharUnits::fromQuantity(4); 834 } 835 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 836 IdentFieldIndex Field, 837 const llvm::Twine &Name = "") { 838 auto Offset = getOffsetOfIdentField(Field); 839 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 840 } 841 842 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( 843 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 844 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 845 assert(ThreadIDVar->getType()->isPointerType() && 846 "thread id variable must be of type kmp_int32 *"); 847 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 848 CodeGenFunction CGF(CGM, true); 849 bool HasCancel = false; 850 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 851 HasCancel = OPD->hasCancel(); 852 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 853 HasCancel = OPSD->hasCancel(); 854 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 855 HasCancel = OPFD->hasCancel(); 856 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 857 HasCancel); 858 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 859 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 860 } 861 862 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 863 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 864 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 865 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 866 bool Tied, unsigned &NumberOfParts) { 867 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 868 PrePostActionTy &) { 869 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 870 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 871 llvm::Value *TaskArgs[] = { 872 UpLoc, ThreadID, 873 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 874 TaskTVar->getType()->castAs<PointerType>()) 875 .getPointer()}; 876 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 877 }; 878 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 879 UntiedCodeGen); 880 CodeGen.setAction(Action); 881 assert(!ThreadIDVar->getType()->isPointerType() && 882 "thread id variable must be of type kmp_int32 for tasks"); 883 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 884 auto *TD = dyn_cast<OMPTaskDirective>(&D); 885 CodeGenFunction CGF(CGM, true); 886 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 887 InnermostKind, 888 TD ? TD->hasCancel() : false, Action); 889 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 890 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 891 if (!Tied) 892 NumberOfParts = Action.getNumberOfParts(); 893 return Res; 894 } 895 896 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 897 CharUnits Align = getIdentAlign(CGM); 898 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 899 if (!Entry) { 900 if (!DefaultOpenMPPSource) { 901 // Initialize default location for psource field of ident_t structure of 902 // all ident_t objects. Format is ";file;function;line;column;;". 903 // Taken from 904 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 905 DefaultOpenMPPSource = 906 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 907 DefaultOpenMPPSource = 908 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 909 } 910 911 ConstantInitBuilder builder(CGM); 912 auto fields = builder.beginStruct(IdentTy); 913 fields.addInt(CGM.Int32Ty, 0); 914 fields.addInt(CGM.Int32Ty, Flags); 915 fields.addInt(CGM.Int32Ty, 0); 916 fields.addInt(CGM.Int32Ty, 0); 917 fields.add(DefaultOpenMPPSource); 918 auto DefaultOpenMPLocation = 919 fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, 920 llvm::GlobalValue::PrivateLinkage); 921 DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 922 923 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 924 } 925 return Address(Entry, Align); 926 } 927 928 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 929 SourceLocation Loc, 930 unsigned Flags) { 931 Flags |= OMP_IDENT_KMPC; 932 // If no debug info is generated - return global default location. 933 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 934 Loc.isInvalid()) 935 return getOrCreateDefaultLocation(Flags).getPointer(); 936 937 assert(CGF.CurFn && "No function in current CodeGenFunction."); 938 939 Address LocValue = Address::invalid(); 940 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 941 if (I != OpenMPLocThreadIDMap.end()) 942 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 943 944 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 945 // GetOpenMPThreadID was called before this routine. 946 if (!LocValue.isValid()) { 947 // Generate "ident_t .kmpc_loc.addr;" 948 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 949 ".kmpc_loc.addr"); 950 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 951 Elem.second.DebugLoc = AI.getPointer(); 952 LocValue = AI; 953 954 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 955 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 956 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 957 CGM.getSize(getIdentSize(CGF.CGM))); 958 } 959 960 // char **psource = &.kmpc_loc_<flags>.addr.psource; 961 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 962 963 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 964 if (OMPDebugLoc == nullptr) { 965 SmallString<128> Buffer2; 966 llvm::raw_svector_ostream OS2(Buffer2); 967 // Build debug location 968 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 969 OS2 << ";" << PLoc.getFilename() << ";"; 970 if (const FunctionDecl *FD = 971 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 972 OS2 << FD->getQualifiedNameAsString(); 973 } 974 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 975 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 976 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 977 } 978 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 979 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 980 981 // Our callers always pass this to a runtime function, so for 982 // convenience, go ahead and return a naked pointer. 983 return LocValue.getPointer(); 984 } 985 986 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 987 SourceLocation Loc) { 988 assert(CGF.CurFn && "No function in current CodeGenFunction."); 989 990 llvm::Value *ThreadID = nullptr; 991 // Check whether we've already cached a load of the thread id in this 992 // function. 993 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 994 if (I != OpenMPLocThreadIDMap.end()) { 995 ThreadID = I->second.ThreadID; 996 if (ThreadID != nullptr) 997 return ThreadID; 998 } 999 if (auto *OMPRegionInfo = 1000 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1001 if (OMPRegionInfo->getThreadIDVariable()) { 1002 // Check if this an outlined function with thread id passed as argument. 1003 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1004 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 1005 // If value loaded in entry block, cache it and use it everywhere in 1006 // function. 1007 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1008 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1009 Elem.second.ThreadID = ThreadID; 1010 } 1011 return ThreadID; 1012 } 1013 } 1014 1015 // This is not an outlined function region - need to call __kmpc_int32 1016 // kmpc_global_thread_num(ident_t *loc). 1017 // Generate thread id value and cache this value for use across the 1018 // function. 1019 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1020 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1021 ThreadID = 1022 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1023 emitUpdateLocation(CGF, Loc)); 1024 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1025 Elem.second.ThreadID = ThreadID; 1026 return ThreadID; 1027 } 1028 1029 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1030 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1031 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1032 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1033 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1034 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1035 UDRMap.erase(D); 1036 } 1037 FunctionUDRMap.erase(CGF.CurFn); 1038 } 1039 } 1040 1041 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1042 if (!IdentTy) { 1043 } 1044 return llvm::PointerType::getUnqual(IdentTy); 1045 } 1046 1047 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1048 if (!Kmpc_MicroTy) { 1049 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1050 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1051 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1052 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1053 } 1054 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1055 } 1056 1057 llvm::Constant * 1058 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1059 llvm::Constant *RTLFn = nullptr; 1060 switch (static_cast<OpenMPRTLFunction>(Function)) { 1061 case OMPRTL__kmpc_fork_call: { 1062 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1063 // microtask, ...); 1064 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1065 getKmpc_MicroPointerTy()}; 1066 llvm::FunctionType *FnTy = 1067 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1068 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1069 break; 1070 } 1071 case OMPRTL__kmpc_global_thread_num: { 1072 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1073 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1074 llvm::FunctionType *FnTy = 1075 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1076 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1077 break; 1078 } 1079 case OMPRTL__kmpc_threadprivate_cached: { 1080 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1081 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1082 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1083 CGM.VoidPtrTy, CGM.SizeTy, 1084 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1085 llvm::FunctionType *FnTy = 1086 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1087 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1088 break; 1089 } 1090 case OMPRTL__kmpc_critical: { 1091 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1092 // kmp_critical_name *crit); 1093 llvm::Type *TypeParams[] = { 1094 getIdentTyPointerTy(), CGM.Int32Ty, 1095 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1096 llvm::FunctionType *FnTy = 1097 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1098 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1099 break; 1100 } 1101 case OMPRTL__kmpc_critical_with_hint: { 1102 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1103 // kmp_critical_name *crit, uintptr_t hint); 1104 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1105 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1106 CGM.IntPtrTy}; 1107 llvm::FunctionType *FnTy = 1108 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1109 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1110 break; 1111 } 1112 case OMPRTL__kmpc_threadprivate_register: { 1113 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1114 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1115 // typedef void *(*kmpc_ctor)(void *); 1116 auto KmpcCtorTy = 1117 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1118 /*isVarArg*/ false)->getPointerTo(); 1119 // typedef void *(*kmpc_cctor)(void *, void *); 1120 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1121 auto KmpcCopyCtorTy = 1122 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1123 /*isVarArg*/ false)->getPointerTo(); 1124 // typedef void (*kmpc_dtor)(void *); 1125 auto KmpcDtorTy = 1126 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1127 ->getPointerTo(); 1128 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1129 KmpcCopyCtorTy, KmpcDtorTy}; 1130 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1131 /*isVarArg*/ false); 1132 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1133 break; 1134 } 1135 case OMPRTL__kmpc_end_critical: { 1136 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1137 // kmp_critical_name *crit); 1138 llvm::Type *TypeParams[] = { 1139 getIdentTyPointerTy(), CGM.Int32Ty, 1140 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1141 llvm::FunctionType *FnTy = 1142 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1143 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1144 break; 1145 } 1146 case OMPRTL__kmpc_cancel_barrier: { 1147 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1148 // global_tid); 1149 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1150 llvm::FunctionType *FnTy = 1151 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1152 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1153 break; 1154 } 1155 case OMPRTL__kmpc_barrier: { 1156 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1157 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1158 llvm::FunctionType *FnTy = 1159 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1160 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1161 break; 1162 } 1163 case OMPRTL__kmpc_for_static_fini: { 1164 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1165 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1166 llvm::FunctionType *FnTy = 1167 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1168 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1169 break; 1170 } 1171 case OMPRTL__kmpc_push_num_threads: { 1172 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1173 // kmp_int32 num_threads) 1174 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1175 CGM.Int32Ty}; 1176 llvm::FunctionType *FnTy = 1177 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1178 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1179 break; 1180 } 1181 case OMPRTL__kmpc_serialized_parallel: { 1182 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1183 // global_tid); 1184 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1185 llvm::FunctionType *FnTy = 1186 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1187 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1188 break; 1189 } 1190 case OMPRTL__kmpc_end_serialized_parallel: { 1191 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1192 // global_tid); 1193 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1194 llvm::FunctionType *FnTy = 1195 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1196 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1197 break; 1198 } 1199 case OMPRTL__kmpc_flush: { 1200 // Build void __kmpc_flush(ident_t *loc); 1201 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1202 llvm::FunctionType *FnTy = 1203 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1204 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1205 break; 1206 } 1207 case OMPRTL__kmpc_master: { 1208 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1209 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1210 llvm::FunctionType *FnTy = 1211 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1212 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1213 break; 1214 } 1215 case OMPRTL__kmpc_end_master: { 1216 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1217 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1218 llvm::FunctionType *FnTy = 1219 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1220 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1221 break; 1222 } 1223 case OMPRTL__kmpc_omp_taskyield: { 1224 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1225 // int end_part); 1226 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1227 llvm::FunctionType *FnTy = 1228 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1229 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1230 break; 1231 } 1232 case OMPRTL__kmpc_single: { 1233 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1234 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1235 llvm::FunctionType *FnTy = 1236 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1237 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1238 break; 1239 } 1240 case OMPRTL__kmpc_end_single: { 1241 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1242 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1243 llvm::FunctionType *FnTy = 1244 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1245 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1246 break; 1247 } 1248 case OMPRTL__kmpc_omp_task_alloc: { 1249 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1250 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1251 // kmp_routine_entry_t *task_entry); 1252 assert(KmpRoutineEntryPtrTy != nullptr && 1253 "Type kmp_routine_entry_t must be created."); 1254 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1255 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1256 // Return void * and then cast to particular kmp_task_t type. 1257 llvm::FunctionType *FnTy = 1258 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1259 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1260 break; 1261 } 1262 case OMPRTL__kmpc_omp_task: { 1263 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1264 // *new_task); 1265 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1266 CGM.VoidPtrTy}; 1267 llvm::FunctionType *FnTy = 1268 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1269 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1270 break; 1271 } 1272 case OMPRTL__kmpc_copyprivate: { 1273 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1274 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1275 // kmp_int32 didit); 1276 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1277 auto *CpyFnTy = 1278 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1279 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1280 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1281 CGM.Int32Ty}; 1282 llvm::FunctionType *FnTy = 1283 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1284 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1285 break; 1286 } 1287 case OMPRTL__kmpc_reduce: { 1288 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1289 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1290 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1291 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1292 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1293 /*isVarArg=*/false); 1294 llvm::Type *TypeParams[] = { 1295 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1296 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1297 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1298 llvm::FunctionType *FnTy = 1299 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1300 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1301 break; 1302 } 1303 case OMPRTL__kmpc_reduce_nowait: { 1304 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1305 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1306 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1307 // *lck); 1308 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1309 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1310 /*isVarArg=*/false); 1311 llvm::Type *TypeParams[] = { 1312 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1313 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1314 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1315 llvm::FunctionType *FnTy = 1316 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1317 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1318 break; 1319 } 1320 case OMPRTL__kmpc_end_reduce: { 1321 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1322 // kmp_critical_name *lck); 1323 llvm::Type *TypeParams[] = { 1324 getIdentTyPointerTy(), CGM.Int32Ty, 1325 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1326 llvm::FunctionType *FnTy = 1327 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1328 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1329 break; 1330 } 1331 case OMPRTL__kmpc_end_reduce_nowait: { 1332 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1333 // kmp_critical_name *lck); 1334 llvm::Type *TypeParams[] = { 1335 getIdentTyPointerTy(), CGM.Int32Ty, 1336 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1337 llvm::FunctionType *FnTy = 1338 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1339 RTLFn = 1340 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1341 break; 1342 } 1343 case OMPRTL__kmpc_omp_task_begin_if0: { 1344 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1345 // *new_task); 1346 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1347 CGM.VoidPtrTy}; 1348 llvm::FunctionType *FnTy = 1349 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1350 RTLFn = 1351 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1352 break; 1353 } 1354 case OMPRTL__kmpc_omp_task_complete_if0: { 1355 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1356 // *new_task); 1357 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1358 CGM.VoidPtrTy}; 1359 llvm::FunctionType *FnTy = 1360 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1361 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1362 /*Name=*/"__kmpc_omp_task_complete_if0"); 1363 break; 1364 } 1365 case OMPRTL__kmpc_ordered: { 1366 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1367 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1368 llvm::FunctionType *FnTy = 1369 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1370 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1371 break; 1372 } 1373 case OMPRTL__kmpc_end_ordered: { 1374 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1375 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1376 llvm::FunctionType *FnTy = 1377 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1378 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1379 break; 1380 } 1381 case OMPRTL__kmpc_omp_taskwait: { 1382 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1383 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1384 llvm::FunctionType *FnTy = 1385 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1386 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1387 break; 1388 } 1389 case OMPRTL__kmpc_taskgroup: { 1390 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1391 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1392 llvm::FunctionType *FnTy = 1393 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1394 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1395 break; 1396 } 1397 case OMPRTL__kmpc_end_taskgroup: { 1398 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1399 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1400 llvm::FunctionType *FnTy = 1401 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1402 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1403 break; 1404 } 1405 case OMPRTL__kmpc_push_proc_bind: { 1406 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1407 // int proc_bind) 1408 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1409 llvm::FunctionType *FnTy = 1410 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1411 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1412 break; 1413 } 1414 case OMPRTL__kmpc_omp_task_with_deps: { 1415 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1416 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1417 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1418 llvm::Type *TypeParams[] = { 1419 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1420 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1421 llvm::FunctionType *FnTy = 1422 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1423 RTLFn = 1424 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1425 break; 1426 } 1427 case OMPRTL__kmpc_omp_wait_deps: { 1428 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1429 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1430 // kmp_depend_info_t *noalias_dep_list); 1431 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1432 CGM.Int32Ty, CGM.VoidPtrTy, 1433 CGM.Int32Ty, CGM.VoidPtrTy}; 1434 llvm::FunctionType *FnTy = 1435 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1436 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1437 break; 1438 } 1439 case OMPRTL__kmpc_cancellationpoint: { 1440 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1441 // global_tid, kmp_int32 cncl_kind) 1442 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1443 llvm::FunctionType *FnTy = 1444 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1445 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1446 break; 1447 } 1448 case OMPRTL__kmpc_cancel: { 1449 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1450 // kmp_int32 cncl_kind) 1451 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1452 llvm::FunctionType *FnTy = 1453 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1454 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1455 break; 1456 } 1457 case OMPRTL__kmpc_push_num_teams: { 1458 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1459 // kmp_int32 num_teams, kmp_int32 num_threads) 1460 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1461 CGM.Int32Ty}; 1462 llvm::FunctionType *FnTy = 1463 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1464 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1465 break; 1466 } 1467 case OMPRTL__kmpc_fork_teams: { 1468 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1469 // microtask, ...); 1470 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1471 getKmpc_MicroPointerTy()}; 1472 llvm::FunctionType *FnTy = 1473 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1474 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1475 break; 1476 } 1477 case OMPRTL__kmpc_taskloop: { 1478 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1479 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1480 // sched, kmp_uint64 grainsize, void *task_dup); 1481 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1482 CGM.IntTy, 1483 CGM.VoidPtrTy, 1484 CGM.IntTy, 1485 CGM.Int64Ty->getPointerTo(), 1486 CGM.Int64Ty->getPointerTo(), 1487 CGM.Int64Ty, 1488 CGM.IntTy, 1489 CGM.IntTy, 1490 CGM.Int64Ty, 1491 CGM.VoidPtrTy}; 1492 llvm::FunctionType *FnTy = 1493 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1494 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 1495 break; 1496 } 1497 case OMPRTL__kmpc_doacross_init: { 1498 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 1499 // num_dims, struct kmp_dim *dims); 1500 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1501 CGM.Int32Ty, 1502 CGM.Int32Ty, 1503 CGM.VoidPtrTy}; 1504 llvm::FunctionType *FnTy = 1505 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1506 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 1507 break; 1508 } 1509 case OMPRTL__kmpc_doacross_fini: { 1510 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 1511 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1512 llvm::FunctionType *FnTy = 1513 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1514 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 1515 break; 1516 } 1517 case OMPRTL__kmpc_doacross_post: { 1518 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 1519 // *vec); 1520 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1521 CGM.Int64Ty->getPointerTo()}; 1522 llvm::FunctionType *FnTy = 1523 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1524 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 1525 break; 1526 } 1527 case OMPRTL__kmpc_doacross_wait: { 1528 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 1529 // *vec); 1530 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1531 CGM.Int64Ty->getPointerTo()}; 1532 llvm::FunctionType *FnTy = 1533 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1534 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 1535 break; 1536 } 1537 case OMPRTL__tgt_target: { 1538 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 1539 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 1540 // *arg_types); 1541 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1542 CGM.VoidPtrTy, 1543 CGM.Int32Ty, 1544 CGM.VoidPtrPtrTy, 1545 CGM.VoidPtrPtrTy, 1546 CGM.SizeTy->getPointerTo(), 1547 CGM.Int32Ty->getPointerTo()}; 1548 llvm::FunctionType *FnTy = 1549 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1550 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 1551 break; 1552 } 1553 case OMPRTL__tgt_target_teams: { 1554 // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 1555 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 1556 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 1557 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1558 CGM.VoidPtrTy, 1559 CGM.Int32Ty, 1560 CGM.VoidPtrPtrTy, 1561 CGM.VoidPtrPtrTy, 1562 CGM.SizeTy->getPointerTo(), 1563 CGM.Int32Ty->getPointerTo(), 1564 CGM.Int32Ty, 1565 CGM.Int32Ty}; 1566 llvm::FunctionType *FnTy = 1567 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1568 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 1569 break; 1570 } 1571 case OMPRTL__tgt_register_lib: { 1572 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 1573 QualType ParamTy = 1574 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1575 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1576 llvm::FunctionType *FnTy = 1577 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1578 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 1579 break; 1580 } 1581 case OMPRTL__tgt_unregister_lib: { 1582 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 1583 QualType ParamTy = 1584 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1585 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1586 llvm::FunctionType *FnTy = 1587 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1588 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 1589 break; 1590 } 1591 case OMPRTL__tgt_target_data_begin: { 1592 // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 1593 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1594 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1595 CGM.Int32Ty, 1596 CGM.VoidPtrPtrTy, 1597 CGM.VoidPtrPtrTy, 1598 CGM.SizeTy->getPointerTo(), 1599 CGM.Int32Ty->getPointerTo()}; 1600 llvm::FunctionType *FnTy = 1601 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1602 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 1603 break; 1604 } 1605 case OMPRTL__tgt_target_data_end: { 1606 // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 1607 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1608 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1609 CGM.Int32Ty, 1610 CGM.VoidPtrPtrTy, 1611 CGM.VoidPtrPtrTy, 1612 CGM.SizeTy->getPointerTo(), 1613 CGM.Int32Ty->getPointerTo()}; 1614 llvm::FunctionType *FnTy = 1615 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1616 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 1617 break; 1618 } 1619 case OMPRTL__tgt_target_data_update: { 1620 // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 1621 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1622 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1623 CGM.Int32Ty, 1624 CGM.VoidPtrPtrTy, 1625 CGM.VoidPtrPtrTy, 1626 CGM.SizeTy->getPointerTo(), 1627 CGM.Int32Ty->getPointerTo()}; 1628 llvm::FunctionType *FnTy = 1629 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1630 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 1631 break; 1632 } 1633 } 1634 assert(RTLFn && "Unable to find OpenMP runtime function"); 1635 return RTLFn; 1636 } 1637 1638 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 1639 bool IVSigned) { 1640 assert((IVSize == 32 || IVSize == 64) && 1641 "IV size is not compatible with the omp runtime"); 1642 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1643 : "__kmpc_for_static_init_4u") 1644 : (IVSigned ? "__kmpc_for_static_init_8" 1645 : "__kmpc_for_static_init_8u"); 1646 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1647 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1648 llvm::Type *TypeParams[] = { 1649 getIdentTyPointerTy(), // loc 1650 CGM.Int32Ty, // tid 1651 CGM.Int32Ty, // schedtype 1652 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1653 PtrTy, // p_lower 1654 PtrTy, // p_upper 1655 PtrTy, // p_stride 1656 ITy, // incr 1657 ITy // chunk 1658 }; 1659 llvm::FunctionType *FnTy = 1660 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1661 return CGM.CreateRuntimeFunction(FnTy, Name); 1662 } 1663 1664 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 1665 bool IVSigned) { 1666 assert((IVSize == 32 || IVSize == 64) && 1667 "IV size is not compatible with the omp runtime"); 1668 auto Name = 1669 IVSize == 32 1670 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1671 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1672 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1673 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1674 CGM.Int32Ty, // tid 1675 CGM.Int32Ty, // schedtype 1676 ITy, // lower 1677 ITy, // upper 1678 ITy, // stride 1679 ITy // chunk 1680 }; 1681 llvm::FunctionType *FnTy = 1682 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1683 return CGM.CreateRuntimeFunction(FnTy, Name); 1684 } 1685 1686 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1687 bool IVSigned) { 1688 assert((IVSize == 32 || IVSize == 64) && 1689 "IV size is not compatible with the omp runtime"); 1690 auto Name = 1691 IVSize == 32 1692 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1693 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1694 llvm::Type *TypeParams[] = { 1695 getIdentTyPointerTy(), // loc 1696 CGM.Int32Ty, // tid 1697 }; 1698 llvm::FunctionType *FnTy = 1699 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1700 return CGM.CreateRuntimeFunction(FnTy, Name); 1701 } 1702 1703 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1704 bool IVSigned) { 1705 assert((IVSize == 32 || IVSize == 64) && 1706 "IV size is not compatible with the omp runtime"); 1707 auto Name = 1708 IVSize == 32 1709 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1710 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1711 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1712 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1713 llvm::Type *TypeParams[] = { 1714 getIdentTyPointerTy(), // loc 1715 CGM.Int32Ty, // tid 1716 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1717 PtrTy, // p_lower 1718 PtrTy, // p_upper 1719 PtrTy // p_stride 1720 }; 1721 llvm::FunctionType *FnTy = 1722 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1723 return CGM.CreateRuntimeFunction(FnTy, Name); 1724 } 1725 1726 llvm::Constant * 1727 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1728 assert(!CGM.getLangOpts().OpenMPUseTLS || 1729 !CGM.getContext().getTargetInfo().isTLSSupported()); 1730 // Lookup the entry, lazily creating it if necessary. 1731 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1732 Twine(CGM.getMangledName(VD)) + ".cache."); 1733 } 1734 1735 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1736 const VarDecl *VD, 1737 Address VDAddr, 1738 SourceLocation Loc) { 1739 if (CGM.getLangOpts().OpenMPUseTLS && 1740 CGM.getContext().getTargetInfo().isTLSSupported()) 1741 return VDAddr; 1742 1743 auto VarTy = VDAddr.getElementType(); 1744 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1745 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1746 CGM.Int8PtrTy), 1747 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1748 getOrCreateThreadPrivateCache(VD)}; 1749 return Address(CGF.EmitRuntimeCall( 1750 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1751 VDAddr.getAlignment()); 1752 } 1753 1754 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1755 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1756 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1757 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1758 // library. 1759 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1760 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1761 OMPLoc); 1762 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1763 // to register constructor/destructor for variable. 1764 llvm::Value *Args[] = {OMPLoc, 1765 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1766 CGM.VoidPtrTy), 1767 Ctor, CopyCtor, Dtor}; 1768 CGF.EmitRuntimeCall( 1769 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1770 } 1771 1772 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1773 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1774 bool PerformInit, CodeGenFunction *CGF) { 1775 if (CGM.getLangOpts().OpenMPUseTLS && 1776 CGM.getContext().getTargetInfo().isTLSSupported()) 1777 return nullptr; 1778 1779 VD = VD->getDefinition(CGM.getContext()); 1780 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1781 ThreadPrivateWithDefinition.insert(VD); 1782 QualType ASTTy = VD->getType(); 1783 1784 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1785 auto Init = VD->getAnyInitializer(); 1786 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1787 // Generate function that re-emits the declaration's initializer into the 1788 // threadprivate copy of the variable VD 1789 CodeGenFunction CtorCGF(CGM); 1790 FunctionArgList Args; 1791 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1792 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1793 Args.push_back(&Dst); 1794 1795 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1796 CGM.getContext().VoidPtrTy, Args); 1797 auto FTy = CGM.getTypes().GetFunctionType(FI); 1798 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1799 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1800 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1801 Args, SourceLocation()); 1802 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1803 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1804 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1805 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1806 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1807 CtorCGF.ConvertTypeForMem(ASTTy)); 1808 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1809 /*IsInitializer=*/true); 1810 ArgVal = CtorCGF.EmitLoadOfScalar( 1811 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1812 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1813 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1814 CtorCGF.FinishFunction(); 1815 Ctor = Fn; 1816 } 1817 if (VD->getType().isDestructedType() != QualType::DK_none) { 1818 // Generate function that emits destructor call for the threadprivate copy 1819 // of the variable VD 1820 CodeGenFunction DtorCGF(CGM); 1821 FunctionArgList Args; 1822 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1823 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1824 Args.push_back(&Dst); 1825 1826 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1827 CGM.getContext().VoidTy, Args); 1828 auto FTy = CGM.getTypes().GetFunctionType(FI); 1829 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1830 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1831 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1832 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1833 SourceLocation()); 1834 // Create a scope with an artificial location for the body of this function. 1835 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1836 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1837 DtorCGF.GetAddrOfLocalVar(&Dst), 1838 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1839 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1840 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1841 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1842 DtorCGF.FinishFunction(); 1843 Dtor = Fn; 1844 } 1845 // Do not emit init function if it is not required. 1846 if (!Ctor && !Dtor) 1847 return nullptr; 1848 1849 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1850 auto CopyCtorTy = 1851 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1852 /*isVarArg=*/false)->getPointerTo(); 1853 // Copying constructor for the threadprivate variable. 1854 // Must be NULL - reserved by runtime, but currently it requires that this 1855 // parameter is always NULL. Otherwise it fires assertion. 1856 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1857 if (Ctor == nullptr) { 1858 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1859 /*isVarArg=*/false)->getPointerTo(); 1860 Ctor = llvm::Constant::getNullValue(CtorTy); 1861 } 1862 if (Dtor == nullptr) { 1863 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1864 /*isVarArg=*/false)->getPointerTo(); 1865 Dtor = llvm::Constant::getNullValue(DtorTy); 1866 } 1867 if (!CGF) { 1868 auto InitFunctionTy = 1869 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1870 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1871 InitFunctionTy, ".__omp_threadprivate_init_.", 1872 CGM.getTypes().arrangeNullaryFunction()); 1873 CodeGenFunction InitCGF(CGM); 1874 FunctionArgList ArgList; 1875 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1876 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1877 Loc); 1878 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1879 InitCGF.FinishFunction(); 1880 return InitFunction; 1881 } 1882 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1883 } 1884 return nullptr; 1885 } 1886 1887 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1888 /// function. Here is the logic: 1889 /// if (Cond) { 1890 /// ThenGen(); 1891 /// } else { 1892 /// ElseGen(); 1893 /// } 1894 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1895 const RegionCodeGenTy &ThenGen, 1896 const RegionCodeGenTy &ElseGen) { 1897 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1898 1899 // If the condition constant folds and can be elided, try to avoid emitting 1900 // the condition and the dead arm of the if/else. 1901 bool CondConstant; 1902 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1903 if (CondConstant) 1904 ThenGen(CGF); 1905 else 1906 ElseGen(CGF); 1907 return; 1908 } 1909 1910 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1911 // emit the conditional branch. 1912 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1913 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1914 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1915 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1916 1917 // Emit the 'then' code. 1918 CGF.EmitBlock(ThenBlock); 1919 ThenGen(CGF); 1920 CGF.EmitBranch(ContBlock); 1921 // Emit the 'else' code if present. 1922 // There is no need to emit line number for unconditional branch. 1923 (void)ApplyDebugLocation::CreateEmpty(CGF); 1924 CGF.EmitBlock(ElseBlock); 1925 ElseGen(CGF); 1926 // There is no need to emit line number for unconditional branch. 1927 (void)ApplyDebugLocation::CreateEmpty(CGF); 1928 CGF.EmitBranch(ContBlock); 1929 // Emit the continuation block for code after the if. 1930 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1931 } 1932 1933 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1934 llvm::Value *OutlinedFn, 1935 ArrayRef<llvm::Value *> CapturedVars, 1936 const Expr *IfCond) { 1937 if (!CGF.HaveInsertPoint()) 1938 return; 1939 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1940 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 1941 PrePostActionTy &) { 1942 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1943 auto &RT = CGF.CGM.getOpenMPRuntime(); 1944 llvm::Value *Args[] = { 1945 RTLoc, 1946 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1947 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1948 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1949 RealArgs.append(std::begin(Args), std::end(Args)); 1950 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1951 1952 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 1953 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1954 }; 1955 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 1956 PrePostActionTy &) { 1957 auto &RT = CGF.CGM.getOpenMPRuntime(); 1958 auto ThreadID = RT.getThreadID(CGF, Loc); 1959 // Build calls: 1960 // __kmpc_serialized_parallel(&Loc, GTid); 1961 llvm::Value *Args[] = {RTLoc, ThreadID}; 1962 CGF.EmitRuntimeCall( 1963 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 1964 1965 // OutlinedFn(>id, &zero, CapturedStruct); 1966 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1967 Address ZeroAddr = 1968 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1969 /*Name*/ ".zero.addr"); 1970 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1971 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1972 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1973 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1974 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1975 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1976 1977 // __kmpc_end_serialized_parallel(&Loc, GTid); 1978 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1979 CGF.EmitRuntimeCall( 1980 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 1981 EndArgs); 1982 }; 1983 if (IfCond) 1984 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1985 else { 1986 RegionCodeGenTy ThenRCG(ThenGen); 1987 ThenRCG(CGF); 1988 } 1989 } 1990 1991 // If we're inside an (outlined) parallel region, use the region info's 1992 // thread-ID variable (it is passed in a first argument of the outlined function 1993 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1994 // regular serial code region, get thread ID by calling kmp_int32 1995 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1996 // return the address of that temp. 1997 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1998 SourceLocation Loc) { 1999 if (auto *OMPRegionInfo = 2000 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2001 if (OMPRegionInfo->getThreadIDVariable()) 2002 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2003 2004 auto ThreadID = getThreadID(CGF, Loc); 2005 auto Int32Ty = 2006 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2007 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2008 CGF.EmitStoreOfScalar(ThreadID, 2009 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2010 2011 return ThreadIDTemp; 2012 } 2013 2014 llvm::Constant * 2015 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2016 const llvm::Twine &Name) { 2017 SmallString<256> Buffer; 2018 llvm::raw_svector_ostream Out(Buffer); 2019 Out << Name; 2020 auto RuntimeName = Out.str(); 2021 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 2022 if (Elem.second) { 2023 assert(Elem.second->getType()->getPointerElementType() == Ty && 2024 "OMP internal variable has different type than requested"); 2025 return &*Elem.second; 2026 } 2027 2028 return Elem.second = new llvm::GlobalVariable( 2029 CGM.getModule(), Ty, /*IsConstant*/ false, 2030 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2031 Elem.first()); 2032 } 2033 2034 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2035 llvm::Twine Name(".gomp_critical_user_", CriticalName); 2036 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 2037 } 2038 2039 namespace { 2040 /// Common pre(post)-action for different OpenMP constructs. 2041 class CommonActionTy final : public PrePostActionTy { 2042 llvm::Value *EnterCallee; 2043 ArrayRef<llvm::Value *> EnterArgs; 2044 llvm::Value *ExitCallee; 2045 ArrayRef<llvm::Value *> ExitArgs; 2046 bool Conditional; 2047 llvm::BasicBlock *ContBlock = nullptr; 2048 2049 public: 2050 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2051 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2052 bool Conditional = false) 2053 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2054 ExitArgs(ExitArgs), Conditional(Conditional) {} 2055 void Enter(CodeGenFunction &CGF) override { 2056 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2057 if (Conditional) { 2058 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2059 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2060 ContBlock = CGF.createBasicBlock("omp_if.end"); 2061 // Generate the branch (If-stmt) 2062 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2063 CGF.EmitBlock(ThenBlock); 2064 } 2065 } 2066 void Done(CodeGenFunction &CGF) { 2067 // Emit the rest of blocks/branches 2068 CGF.EmitBranch(ContBlock); 2069 CGF.EmitBlock(ContBlock, true); 2070 } 2071 void Exit(CodeGenFunction &CGF) override { 2072 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2073 } 2074 }; 2075 } // anonymous namespace 2076 2077 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2078 StringRef CriticalName, 2079 const RegionCodeGenTy &CriticalOpGen, 2080 SourceLocation Loc, const Expr *Hint) { 2081 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2082 // CriticalOpGen(); 2083 // __kmpc_end_critical(ident_t *, gtid, Lock); 2084 // Prepare arguments and build a call to __kmpc_critical 2085 if (!CGF.HaveInsertPoint()) 2086 return; 2087 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2088 getCriticalRegionLock(CriticalName)}; 2089 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2090 std::end(Args)); 2091 if (Hint) { 2092 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2093 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2094 } 2095 CommonActionTy Action( 2096 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2097 : OMPRTL__kmpc_critical), 2098 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2099 CriticalOpGen.setAction(Action); 2100 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2101 } 2102 2103 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2104 const RegionCodeGenTy &MasterOpGen, 2105 SourceLocation Loc) { 2106 if (!CGF.HaveInsertPoint()) 2107 return; 2108 // if(__kmpc_master(ident_t *, gtid)) { 2109 // MasterOpGen(); 2110 // __kmpc_end_master(ident_t *, gtid); 2111 // } 2112 // Prepare arguments and build a call to __kmpc_master 2113 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2114 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2115 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2116 /*Conditional=*/true); 2117 MasterOpGen.setAction(Action); 2118 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2119 Action.Done(CGF); 2120 } 2121 2122 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2123 SourceLocation Loc) { 2124 if (!CGF.HaveInsertPoint()) 2125 return; 2126 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2127 llvm::Value *Args[] = { 2128 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2129 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2130 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2131 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2132 Region->emitUntiedSwitch(CGF); 2133 } 2134 2135 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2136 const RegionCodeGenTy &TaskgroupOpGen, 2137 SourceLocation Loc) { 2138 if (!CGF.HaveInsertPoint()) 2139 return; 2140 // __kmpc_taskgroup(ident_t *, gtid); 2141 // TaskgroupOpGen(); 2142 // __kmpc_end_taskgroup(ident_t *, gtid); 2143 // Prepare arguments and build a call to __kmpc_taskgroup 2144 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2145 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2146 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2147 Args); 2148 TaskgroupOpGen.setAction(Action); 2149 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2150 } 2151 2152 /// Given an array of pointers to variables, project the address of a 2153 /// given variable. 2154 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2155 unsigned Index, const VarDecl *Var) { 2156 // Pull out the pointer to the variable. 2157 Address PtrAddr = 2158 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2159 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2160 2161 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2162 Addr = CGF.Builder.CreateElementBitCast( 2163 Addr, CGF.ConvertTypeForMem(Var->getType())); 2164 return Addr; 2165 } 2166 2167 static llvm::Value *emitCopyprivateCopyFunction( 2168 CodeGenModule &CGM, llvm::Type *ArgsType, 2169 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2170 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 2171 auto &C = CGM.getContext(); 2172 // void copy_func(void *LHSArg, void *RHSArg); 2173 FunctionArgList Args; 2174 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2175 C.VoidPtrTy); 2176 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2177 C.VoidPtrTy); 2178 Args.push_back(&LHSArg); 2179 Args.push_back(&RHSArg); 2180 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2181 auto *Fn = llvm::Function::Create( 2182 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2183 ".omp.copyprivate.copy_func", &CGM.getModule()); 2184 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2185 CodeGenFunction CGF(CGM); 2186 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2187 // Dest = (void*[n])(LHSArg); 2188 // Src = (void*[n])(RHSArg); 2189 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2190 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2191 ArgsType), CGF.getPointerAlign()); 2192 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2193 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2194 ArgsType), CGF.getPointerAlign()); 2195 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2196 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2197 // ... 2198 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2199 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2200 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2201 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2202 2203 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2204 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2205 2206 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2207 QualType Type = VD->getType(); 2208 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2209 } 2210 CGF.FinishFunction(); 2211 return Fn; 2212 } 2213 2214 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2215 const RegionCodeGenTy &SingleOpGen, 2216 SourceLocation Loc, 2217 ArrayRef<const Expr *> CopyprivateVars, 2218 ArrayRef<const Expr *> SrcExprs, 2219 ArrayRef<const Expr *> DstExprs, 2220 ArrayRef<const Expr *> AssignmentOps) { 2221 if (!CGF.HaveInsertPoint()) 2222 return; 2223 assert(CopyprivateVars.size() == SrcExprs.size() && 2224 CopyprivateVars.size() == DstExprs.size() && 2225 CopyprivateVars.size() == AssignmentOps.size()); 2226 auto &C = CGM.getContext(); 2227 // int32 did_it = 0; 2228 // if(__kmpc_single(ident_t *, gtid)) { 2229 // SingleOpGen(); 2230 // __kmpc_end_single(ident_t *, gtid); 2231 // did_it = 1; 2232 // } 2233 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2234 // <copy_func>, did_it); 2235 2236 Address DidIt = Address::invalid(); 2237 if (!CopyprivateVars.empty()) { 2238 // int32 did_it = 0; 2239 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2240 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2241 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2242 } 2243 // Prepare arguments and build a call to __kmpc_single 2244 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2245 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2246 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2247 /*Conditional=*/true); 2248 SingleOpGen.setAction(Action); 2249 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2250 if (DidIt.isValid()) { 2251 // did_it = 1; 2252 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2253 } 2254 Action.Done(CGF); 2255 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2256 // <copy_func>, did_it); 2257 if (DidIt.isValid()) { 2258 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2259 auto CopyprivateArrayTy = 2260 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2261 /*IndexTypeQuals=*/0); 2262 // Create a list of all private variables for copyprivate. 2263 Address CopyprivateList = 2264 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2265 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2266 Address Elem = CGF.Builder.CreateConstArrayGEP( 2267 CopyprivateList, I, CGF.getPointerSize()); 2268 CGF.Builder.CreateStore( 2269 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2270 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2271 Elem); 2272 } 2273 // Build function that copies private values from single region to all other 2274 // threads in the corresponding parallel region. 2275 auto *CpyFn = emitCopyprivateCopyFunction( 2276 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2277 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 2278 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2279 Address CL = 2280 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2281 CGF.VoidPtrTy); 2282 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2283 llvm::Value *Args[] = { 2284 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2285 getThreadID(CGF, Loc), // i32 <gtid> 2286 BufSize, // size_t <buf_size> 2287 CL.getPointer(), // void *<copyprivate list> 2288 CpyFn, // void (*) (void *, void *) <copy_func> 2289 DidItVal // i32 did_it 2290 }; 2291 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2292 } 2293 } 2294 2295 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2296 const RegionCodeGenTy &OrderedOpGen, 2297 SourceLocation Loc, bool IsThreads) { 2298 if (!CGF.HaveInsertPoint()) 2299 return; 2300 // __kmpc_ordered(ident_t *, gtid); 2301 // OrderedOpGen(); 2302 // __kmpc_end_ordered(ident_t *, gtid); 2303 // Prepare arguments and build a call to __kmpc_ordered 2304 if (IsThreads) { 2305 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2306 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2307 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2308 Args); 2309 OrderedOpGen.setAction(Action); 2310 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2311 return; 2312 } 2313 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2314 } 2315 2316 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2317 OpenMPDirectiveKind Kind, bool EmitChecks, 2318 bool ForceSimpleCall) { 2319 if (!CGF.HaveInsertPoint()) 2320 return; 2321 // Build call __kmpc_cancel_barrier(loc, thread_id); 2322 // Build call __kmpc_barrier(loc, thread_id); 2323 unsigned Flags; 2324 if (Kind == OMPD_for) 2325 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2326 else if (Kind == OMPD_sections) 2327 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2328 else if (Kind == OMPD_single) 2329 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2330 else if (Kind == OMPD_barrier) 2331 Flags = OMP_IDENT_BARRIER_EXPL; 2332 else 2333 Flags = OMP_IDENT_BARRIER_IMPL; 2334 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2335 // thread_id); 2336 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2337 getThreadID(CGF, Loc)}; 2338 if (auto *OMPRegionInfo = 2339 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2340 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2341 auto *Result = CGF.EmitRuntimeCall( 2342 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2343 if (EmitChecks) { 2344 // if (__kmpc_cancel_barrier()) { 2345 // exit from construct; 2346 // } 2347 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2348 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2349 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2350 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2351 CGF.EmitBlock(ExitBB); 2352 // exit from construct; 2353 auto CancelDestination = 2354 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2355 CGF.EmitBranchThroughCleanup(CancelDestination); 2356 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2357 } 2358 return; 2359 } 2360 } 2361 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2362 } 2363 2364 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2365 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2366 bool Chunked, bool Ordered) { 2367 switch (ScheduleKind) { 2368 case OMPC_SCHEDULE_static: 2369 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2370 : (Ordered ? OMP_ord_static : OMP_sch_static); 2371 case OMPC_SCHEDULE_dynamic: 2372 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2373 case OMPC_SCHEDULE_guided: 2374 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2375 case OMPC_SCHEDULE_runtime: 2376 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2377 case OMPC_SCHEDULE_auto: 2378 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2379 case OMPC_SCHEDULE_unknown: 2380 assert(!Chunked && "chunk was specified but schedule kind not known"); 2381 return Ordered ? OMP_ord_static : OMP_sch_static; 2382 } 2383 llvm_unreachable("Unexpected runtime schedule"); 2384 } 2385 2386 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 2387 static OpenMPSchedType 2388 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2389 // only static is allowed for dist_schedule 2390 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2391 } 2392 2393 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2394 bool Chunked) const { 2395 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2396 return Schedule == OMP_sch_static; 2397 } 2398 2399 bool CGOpenMPRuntime::isStaticNonchunked( 2400 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2401 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2402 return Schedule == OMP_dist_sch_static; 2403 } 2404 2405 2406 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2407 auto Schedule = 2408 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2409 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2410 return Schedule != OMP_sch_static; 2411 } 2412 2413 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 2414 OpenMPScheduleClauseModifier M1, 2415 OpenMPScheduleClauseModifier M2) { 2416 int Modifier = 0; 2417 switch (M1) { 2418 case OMPC_SCHEDULE_MODIFIER_monotonic: 2419 Modifier = OMP_sch_modifier_monotonic; 2420 break; 2421 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2422 Modifier = OMP_sch_modifier_nonmonotonic; 2423 break; 2424 case OMPC_SCHEDULE_MODIFIER_simd: 2425 if (Schedule == OMP_sch_static_chunked) 2426 Schedule = OMP_sch_static_balanced_chunked; 2427 break; 2428 case OMPC_SCHEDULE_MODIFIER_last: 2429 case OMPC_SCHEDULE_MODIFIER_unknown: 2430 break; 2431 } 2432 switch (M2) { 2433 case OMPC_SCHEDULE_MODIFIER_monotonic: 2434 Modifier = OMP_sch_modifier_monotonic; 2435 break; 2436 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2437 Modifier = OMP_sch_modifier_nonmonotonic; 2438 break; 2439 case OMPC_SCHEDULE_MODIFIER_simd: 2440 if (Schedule == OMP_sch_static_chunked) 2441 Schedule = OMP_sch_static_balanced_chunked; 2442 break; 2443 case OMPC_SCHEDULE_MODIFIER_last: 2444 case OMPC_SCHEDULE_MODIFIER_unknown: 2445 break; 2446 } 2447 return Schedule | Modifier; 2448 } 2449 2450 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 2451 SourceLocation Loc, 2452 const OpenMPScheduleTy &ScheduleKind, 2453 unsigned IVSize, bool IVSigned, 2454 bool Ordered, llvm::Value *UB, 2455 llvm::Value *Chunk) { 2456 if (!CGF.HaveInsertPoint()) 2457 return; 2458 OpenMPSchedType Schedule = 2459 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 2460 assert(Ordered || 2461 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2462 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2463 Schedule != OMP_sch_static_balanced_chunked)); 2464 // Call __kmpc_dispatch_init( 2465 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2466 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2467 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2468 2469 // If the Chunk was not specified in the clause - use default value 1. 2470 if (Chunk == nullptr) 2471 Chunk = CGF.Builder.getIntN(IVSize, 1); 2472 llvm::Value *Args[] = { 2473 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2474 CGF.Builder.getInt32(addMonoNonMonoModifier( 2475 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2476 CGF.Builder.getIntN(IVSize, 0), // Lower 2477 UB, // Upper 2478 CGF.Builder.getIntN(IVSize, 1), // Stride 2479 Chunk // Chunk 2480 }; 2481 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2482 } 2483 2484 static void emitForStaticInitCall( 2485 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2486 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 2487 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2488 unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, 2489 Address ST, llvm::Value *Chunk) { 2490 if (!CGF.HaveInsertPoint()) 2491 return; 2492 2493 assert(!Ordered); 2494 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2495 Schedule == OMP_sch_static_balanced_chunked || 2496 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2497 Schedule == OMP_dist_sch_static || 2498 Schedule == OMP_dist_sch_static_chunked); 2499 2500 // Call __kmpc_for_static_init( 2501 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2502 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2503 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2504 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2505 if (Chunk == nullptr) { 2506 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2507 Schedule == OMP_dist_sch_static) && 2508 "expected static non-chunked schedule"); 2509 // If the Chunk was not specified in the clause - use default value 1. 2510 Chunk = CGF.Builder.getIntN(IVSize, 1); 2511 } else { 2512 assert((Schedule == OMP_sch_static_chunked || 2513 Schedule == OMP_sch_static_balanced_chunked || 2514 Schedule == OMP_ord_static_chunked || 2515 Schedule == OMP_dist_sch_static_chunked) && 2516 "expected static chunked schedule"); 2517 } 2518 llvm::Value *Args[] = { 2519 UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( 2520 Schedule, M1, M2)), // Schedule type 2521 IL.getPointer(), // &isLastIter 2522 LB.getPointer(), // &LB 2523 UB.getPointer(), // &UB 2524 ST.getPointer(), // &Stride 2525 CGF.Builder.getIntN(IVSize, 1), // Incr 2526 Chunk // Chunk 2527 }; 2528 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2529 } 2530 2531 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2532 SourceLocation Loc, 2533 const OpenMPScheduleTy &ScheduleKind, 2534 unsigned IVSize, bool IVSigned, 2535 bool Ordered, Address IL, Address LB, 2536 Address UB, Address ST, 2537 llvm::Value *Chunk) { 2538 OpenMPSchedType ScheduleNum = 2539 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 2540 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2541 auto *ThreadId = getThreadID(CGF, Loc); 2542 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2543 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2544 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, 2545 Ordered, IL, LB, UB, ST, Chunk); 2546 } 2547 2548 void CGOpenMPRuntime::emitDistributeStaticInit( 2549 CodeGenFunction &CGF, SourceLocation Loc, 2550 OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, 2551 bool Ordered, Address IL, Address LB, Address UB, Address ST, 2552 llvm::Value *Chunk) { 2553 OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); 2554 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2555 auto *ThreadId = getThreadID(CGF, Loc); 2556 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2557 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2558 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2559 OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, 2560 UB, ST, Chunk); 2561 } 2562 2563 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2564 SourceLocation Loc) { 2565 if (!CGF.HaveInsertPoint()) 2566 return; 2567 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2568 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2569 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 2570 Args); 2571 } 2572 2573 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2574 SourceLocation Loc, 2575 unsigned IVSize, 2576 bool IVSigned) { 2577 if (!CGF.HaveInsertPoint()) 2578 return; 2579 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2580 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2581 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2582 } 2583 2584 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2585 SourceLocation Loc, unsigned IVSize, 2586 bool IVSigned, Address IL, 2587 Address LB, Address UB, 2588 Address ST) { 2589 // Call __kmpc_dispatch_next( 2590 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2591 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2592 // kmp_int[32|64] *p_stride); 2593 llvm::Value *Args[] = { 2594 emitUpdateLocation(CGF, Loc), 2595 getThreadID(CGF, Loc), 2596 IL.getPointer(), // &isLastIter 2597 LB.getPointer(), // &Lower 2598 UB.getPointer(), // &Upper 2599 ST.getPointer() // &Stride 2600 }; 2601 llvm::Value *Call = 2602 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2603 return CGF.EmitScalarConversion( 2604 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 2605 CGF.getContext().BoolTy, Loc); 2606 } 2607 2608 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2609 llvm::Value *NumThreads, 2610 SourceLocation Loc) { 2611 if (!CGF.HaveInsertPoint()) 2612 return; 2613 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2614 llvm::Value *Args[] = { 2615 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2616 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2617 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 2618 Args); 2619 } 2620 2621 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2622 OpenMPProcBindClauseKind ProcBind, 2623 SourceLocation Loc) { 2624 if (!CGF.HaveInsertPoint()) 2625 return; 2626 // Constants for proc bind value accepted by the runtime. 2627 enum ProcBindTy { 2628 ProcBindFalse = 0, 2629 ProcBindTrue, 2630 ProcBindMaster, 2631 ProcBindClose, 2632 ProcBindSpread, 2633 ProcBindIntel, 2634 ProcBindDefault 2635 } RuntimeProcBind; 2636 switch (ProcBind) { 2637 case OMPC_PROC_BIND_master: 2638 RuntimeProcBind = ProcBindMaster; 2639 break; 2640 case OMPC_PROC_BIND_close: 2641 RuntimeProcBind = ProcBindClose; 2642 break; 2643 case OMPC_PROC_BIND_spread: 2644 RuntimeProcBind = ProcBindSpread; 2645 break; 2646 case OMPC_PROC_BIND_unknown: 2647 llvm_unreachable("Unsupported proc_bind value."); 2648 } 2649 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2650 llvm::Value *Args[] = { 2651 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2652 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 2653 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 2654 } 2655 2656 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2657 SourceLocation Loc) { 2658 if (!CGF.HaveInsertPoint()) 2659 return; 2660 // Build call void __kmpc_flush(ident_t *loc) 2661 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 2662 emitUpdateLocation(CGF, Loc)); 2663 } 2664 2665 namespace { 2666 /// \brief Indexes of fields for type kmp_task_t. 2667 enum KmpTaskTFields { 2668 /// \brief List of shared variables. 2669 KmpTaskTShareds, 2670 /// \brief Task routine. 2671 KmpTaskTRoutine, 2672 /// \brief Partition id for the untied tasks. 2673 KmpTaskTPartId, 2674 /// Function with call of destructors for private variables. 2675 Data1, 2676 /// Task priority. 2677 Data2, 2678 /// (Taskloops only) Lower bound. 2679 KmpTaskTLowerBound, 2680 /// (Taskloops only) Upper bound. 2681 KmpTaskTUpperBound, 2682 /// (Taskloops only) Stride. 2683 KmpTaskTStride, 2684 /// (Taskloops only) Is last iteration flag. 2685 KmpTaskTLastIter, 2686 }; 2687 } // anonymous namespace 2688 2689 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2690 // FIXME: Add other entries type when they become supported. 2691 return OffloadEntriesTargetRegion.empty(); 2692 } 2693 2694 /// \brief Initialize target region entry. 2695 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2696 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2697 StringRef ParentName, unsigned LineNum, 2698 unsigned Order) { 2699 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2700 "only required for the device " 2701 "code generation."); 2702 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2703 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); 2704 ++OffloadingEntriesNum; 2705 } 2706 2707 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2708 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2709 StringRef ParentName, unsigned LineNum, 2710 llvm::Constant *Addr, llvm::Constant *ID) { 2711 // If we are emitting code for a target, the entry is already initialized, 2712 // only has to be registered. 2713 if (CGM.getLangOpts().OpenMPIsDevice) { 2714 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2715 "Entry must exist."); 2716 auto &Entry = 2717 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2718 assert(Entry.isValid() && "Entry not initialized!"); 2719 Entry.setAddress(Addr); 2720 Entry.setID(ID); 2721 return; 2722 } else { 2723 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); 2724 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2725 } 2726 } 2727 2728 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2729 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2730 unsigned LineNum) const { 2731 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2732 if (PerDevice == OffloadEntriesTargetRegion.end()) 2733 return false; 2734 auto PerFile = PerDevice->second.find(FileID); 2735 if (PerFile == PerDevice->second.end()) 2736 return false; 2737 auto PerParentName = PerFile->second.find(ParentName); 2738 if (PerParentName == PerFile->second.end()) 2739 return false; 2740 auto PerLine = PerParentName->second.find(LineNum); 2741 if (PerLine == PerParentName->second.end()) 2742 return false; 2743 // Fail if this entry is already registered. 2744 if (PerLine->second.getAddress() || PerLine->second.getID()) 2745 return false; 2746 return true; 2747 } 2748 2749 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2750 const OffloadTargetRegionEntryInfoActTy &Action) { 2751 // Scan all target region entries and perform the provided action. 2752 for (auto &D : OffloadEntriesTargetRegion) 2753 for (auto &F : D.second) 2754 for (auto &P : F.second) 2755 for (auto &L : P.second) 2756 Action(D.first, F.first, P.first(), L.first, L.second); 2757 } 2758 2759 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 2760 /// \a Codegen. This is used to emit the two functions that register and 2761 /// unregister the descriptor of the current compilation unit. 2762 static llvm::Function * 2763 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 2764 const RegionCodeGenTy &Codegen) { 2765 auto &C = CGM.getContext(); 2766 FunctionArgList Args; 2767 ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), 2768 /*Id=*/nullptr, C.VoidPtrTy); 2769 Args.push_back(&DummyPtr); 2770 2771 CodeGenFunction CGF(CGM); 2772 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2773 auto FTy = CGM.getTypes().GetFunctionType(FI); 2774 auto *Fn = 2775 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 2776 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 2777 Codegen(CGF); 2778 CGF.FinishFunction(); 2779 return Fn; 2780 } 2781 2782 llvm::Function * 2783 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 2784 2785 // If we don't have entries or if we are emitting code for the device, we 2786 // don't need to do anything. 2787 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 2788 return nullptr; 2789 2790 auto &M = CGM.getModule(); 2791 auto &C = CGM.getContext(); 2792 2793 // Get list of devices we care about 2794 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 2795 2796 // We should be creating an offloading descriptor only if there are devices 2797 // specified. 2798 assert(!Devices.empty() && "No OpenMP offloading devices??"); 2799 2800 // Create the external variables that will point to the begin and end of the 2801 // host entries section. These will be defined by the linker. 2802 auto *OffloadEntryTy = 2803 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 2804 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 2805 M, OffloadEntryTy, /*isConstant=*/true, 2806 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2807 ".omp_offloading.entries_begin"); 2808 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 2809 M, OffloadEntryTy, /*isConstant=*/true, 2810 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2811 ".omp_offloading.entries_end"); 2812 2813 // Create all device images 2814 auto *DeviceImageTy = cast<llvm::StructType>( 2815 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 2816 ConstantInitBuilder DeviceImagesBuilder(CGM); 2817 auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); 2818 2819 for (unsigned i = 0; i < Devices.size(); ++i) { 2820 StringRef T = Devices[i].getTriple(); 2821 auto *ImgBegin = new llvm::GlobalVariable( 2822 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2823 /*Initializer=*/nullptr, 2824 Twine(".omp_offloading.img_start.") + Twine(T)); 2825 auto *ImgEnd = new llvm::GlobalVariable( 2826 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2827 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 2828 2829 auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); 2830 Dev.add(ImgBegin); 2831 Dev.add(ImgEnd); 2832 Dev.add(HostEntriesBegin); 2833 Dev.add(HostEntriesEnd); 2834 Dev.finishAndAddTo(DeviceImagesEntries); 2835 } 2836 2837 // Create device images global array. 2838 llvm::GlobalVariable *DeviceImages = 2839 DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", 2840 CGM.getPointerAlign(), 2841 /*isConstant=*/true); 2842 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2843 2844 // This is a Zero array to be used in the creation of the constant expressions 2845 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 2846 llvm::Constant::getNullValue(CGM.Int32Ty)}; 2847 2848 // Create the target region descriptor. 2849 auto *BinaryDescriptorTy = cast<llvm::StructType>( 2850 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 2851 ConstantInitBuilder DescBuilder(CGM); 2852 auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); 2853 DescInit.addInt(CGM.Int32Ty, Devices.size()); 2854 DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 2855 DeviceImages, 2856 Index)); 2857 DescInit.add(HostEntriesBegin); 2858 DescInit.add(HostEntriesEnd); 2859 2860 auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", 2861 CGM.getPointerAlign(), 2862 /*isConstant=*/true); 2863 2864 // Emit code to register or unregister the descriptor at execution 2865 // startup or closing, respectively. 2866 2867 // Create a variable to drive the registration and unregistration of the 2868 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 2869 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 2870 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 2871 IdentInfo, C.CharTy); 2872 2873 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 2874 CGM, ".omp_offloading.descriptor_unreg", 2875 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2876 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 2877 Desc); 2878 }); 2879 auto *RegFn = createOffloadingBinaryDescriptorFunction( 2880 CGM, ".omp_offloading.descriptor_reg", 2881 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2882 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 2883 Desc); 2884 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 2885 }); 2886 return RegFn; 2887 } 2888 2889 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 2890 llvm::Constant *Addr, uint64_t Size) { 2891 StringRef Name = Addr->getName(); 2892 auto *TgtOffloadEntryType = cast<llvm::StructType>( 2893 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 2894 llvm::LLVMContext &C = CGM.getModule().getContext(); 2895 llvm::Module &M = CGM.getModule(); 2896 2897 // Make sure the address has the right type. 2898 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 2899 2900 // Create constant string with the name. 2901 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 2902 2903 llvm::GlobalVariable *Str = 2904 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 2905 llvm::GlobalValue::InternalLinkage, StrPtrInit, 2906 ".omp_offloading.entry_name"); 2907 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2908 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 2909 2910 // We can't have any padding between symbols, so we need to have 1-byte 2911 // alignment. 2912 auto Align = CharUnits::fromQuantity(1); 2913 2914 // Create the entry struct. 2915 ConstantInitBuilder EntryBuilder(CGM); 2916 auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); 2917 EntryInit.add(AddrPtr); 2918 EntryInit.add(StrPtr); 2919 EntryInit.addInt(CGM.SizeTy, Size); 2920 llvm::GlobalVariable *Entry = 2921 EntryInit.finishAndCreateGlobal(".omp_offloading.entry", 2922 Align, 2923 /*constant*/ true, 2924 llvm::GlobalValue::ExternalLinkage); 2925 2926 // The entry has to be created in the section the linker expects it to be. 2927 Entry->setSection(".omp_offloading.entries"); 2928 } 2929 2930 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2931 // Emit the offloading entries and metadata so that the device codegen side 2932 // can easily figure out what to emit. The produced metadata looks like 2933 // this: 2934 // 2935 // !omp_offload.info = !{!1, ...} 2936 // 2937 // Right now we only generate metadata for function that contain target 2938 // regions. 2939 2940 // If we do not have entries, we dont need to do anything. 2941 if (OffloadEntriesInfoManager.empty()) 2942 return; 2943 2944 llvm::Module &M = CGM.getModule(); 2945 llvm::LLVMContext &C = M.getContext(); 2946 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 2947 OrderedEntries(OffloadEntriesInfoManager.size()); 2948 2949 // Create the offloading info metadata node. 2950 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 2951 2952 // Auxiliar methods to create metadata values and strings. 2953 auto getMDInt = [&](unsigned v) { 2954 return llvm::ConstantAsMetadata::get( 2955 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 2956 }; 2957 2958 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 2959 2960 // Create function that emits metadata for each target region entry; 2961 auto &&TargetRegionMetadataEmitter = [&]( 2962 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 2963 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 2964 llvm::SmallVector<llvm::Metadata *, 32> Ops; 2965 // Generate metadata for target regions. Each entry of this metadata 2966 // contains: 2967 // - Entry 0 -> Kind of this type of metadata (0). 2968 // - Entry 1 -> Device ID of the file where the entry was identified. 2969 // - Entry 2 -> File ID of the file where the entry was identified. 2970 // - Entry 3 -> Mangled name of the function where the entry was identified. 2971 // - Entry 4 -> Line in the file where the entry was identified. 2972 // - Entry 5 -> Order the entry was created. 2973 // The first element of the metadata node is the kind. 2974 Ops.push_back(getMDInt(E.getKind())); 2975 Ops.push_back(getMDInt(DeviceID)); 2976 Ops.push_back(getMDInt(FileID)); 2977 Ops.push_back(getMDString(ParentName)); 2978 Ops.push_back(getMDInt(Line)); 2979 Ops.push_back(getMDInt(E.getOrder())); 2980 2981 // Save this entry in the right position of the ordered entries array. 2982 OrderedEntries[E.getOrder()] = &E; 2983 2984 // Add metadata to the named metadata node. 2985 MD->addOperand(llvm::MDNode::get(C, Ops)); 2986 }; 2987 2988 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 2989 TargetRegionMetadataEmitter); 2990 2991 for (auto *E : OrderedEntries) { 2992 assert(E && "All ordered entries must exist!"); 2993 if (auto *CE = 2994 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 2995 E)) { 2996 assert(CE->getID() && CE->getAddress() && 2997 "Entry ID and Addr are invalid!"); 2998 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 2999 } else 3000 llvm_unreachable("Unsupported entry kind."); 3001 } 3002 } 3003 3004 /// \brief Loads all the offload entries information from the host IR 3005 /// metadata. 3006 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3007 // If we are in target mode, load the metadata from the host IR. This code has 3008 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3009 3010 if (!CGM.getLangOpts().OpenMPIsDevice) 3011 return; 3012 3013 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3014 return; 3015 3016 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3017 if (Buf.getError()) 3018 return; 3019 3020 llvm::LLVMContext C; 3021 auto ME = expectedToErrorOrAndEmitErrors( 3022 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3023 3024 if (ME.getError()) 3025 return; 3026 3027 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3028 if (!MD) 3029 return; 3030 3031 for (auto I : MD->operands()) { 3032 llvm::MDNode *MN = cast<llvm::MDNode>(I); 3033 3034 auto getMDInt = [&](unsigned Idx) { 3035 llvm::ConstantAsMetadata *V = 3036 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3037 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3038 }; 3039 3040 auto getMDString = [&](unsigned Idx) { 3041 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3042 return V->getString(); 3043 }; 3044 3045 switch (getMDInt(0)) { 3046 default: 3047 llvm_unreachable("Unexpected metadata!"); 3048 break; 3049 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3050 OFFLOAD_ENTRY_INFO_TARGET_REGION: 3051 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3052 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 3053 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 3054 /*Order=*/getMDInt(5)); 3055 break; 3056 } 3057 } 3058 } 3059 3060 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3061 if (!KmpRoutineEntryPtrTy) { 3062 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3063 auto &C = CGM.getContext(); 3064 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3065 FunctionProtoType::ExtProtoInfo EPI; 3066 KmpRoutineEntryPtrQTy = C.getPointerType( 3067 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3068 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3069 } 3070 } 3071 3072 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 3073 QualType FieldTy) { 3074 auto *Field = FieldDecl::Create( 3075 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 3076 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 3077 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 3078 Field->setAccess(AS_public); 3079 DC->addDecl(Field); 3080 return Field; 3081 } 3082 3083 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3084 3085 // Make sure the type of the entry is already created. This is the type we 3086 // have to create: 3087 // struct __tgt_offload_entry{ 3088 // void *addr; // Pointer to the offload entry info. 3089 // // (function or global) 3090 // char *name; // Name of the function or global. 3091 // size_t size; // Size of the entry info (0 if it a function). 3092 // }; 3093 if (TgtOffloadEntryQTy.isNull()) { 3094 ASTContext &C = CGM.getContext(); 3095 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3096 RD->startDefinition(); 3097 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3098 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3099 addFieldToRecordDecl(C, RD, C.getSizeType()); 3100 RD->completeDefinition(); 3101 TgtOffloadEntryQTy = C.getRecordType(RD); 3102 } 3103 return TgtOffloadEntryQTy; 3104 } 3105 3106 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 3107 // These are the types we need to build: 3108 // struct __tgt_device_image{ 3109 // void *ImageStart; // Pointer to the target code start. 3110 // void *ImageEnd; // Pointer to the target code end. 3111 // // We also add the host entries to the device image, as it may be useful 3112 // // for the target runtime to have access to that information. 3113 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 3114 // // the entries. 3115 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3116 // // entries (non inclusive). 3117 // }; 3118 if (TgtDeviceImageQTy.isNull()) { 3119 ASTContext &C = CGM.getContext(); 3120 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 3121 RD->startDefinition(); 3122 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3123 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3124 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3125 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3126 RD->completeDefinition(); 3127 TgtDeviceImageQTy = C.getRecordType(RD); 3128 } 3129 return TgtDeviceImageQTy; 3130 } 3131 3132 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 3133 // struct __tgt_bin_desc{ 3134 // int32_t NumDevices; // Number of devices supported. 3135 // __tgt_device_image *DeviceImages; // Arrays of device images 3136 // // (one per device). 3137 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 3138 // // entries. 3139 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3140 // // entries (non inclusive). 3141 // }; 3142 if (TgtBinaryDescriptorQTy.isNull()) { 3143 ASTContext &C = CGM.getContext(); 3144 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 3145 RD->startDefinition(); 3146 addFieldToRecordDecl( 3147 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3148 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 3149 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3150 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3151 RD->completeDefinition(); 3152 TgtBinaryDescriptorQTy = C.getRecordType(RD); 3153 } 3154 return TgtBinaryDescriptorQTy; 3155 } 3156 3157 namespace { 3158 struct PrivateHelpersTy { 3159 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 3160 const VarDecl *PrivateElemInit) 3161 : Original(Original), PrivateCopy(PrivateCopy), 3162 PrivateElemInit(PrivateElemInit) {} 3163 const VarDecl *Original; 3164 const VarDecl *PrivateCopy; 3165 const VarDecl *PrivateElemInit; 3166 }; 3167 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3168 } // anonymous namespace 3169 3170 static RecordDecl * 3171 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3172 if (!Privates.empty()) { 3173 auto &C = CGM.getContext(); 3174 // Build struct .kmp_privates_t. { 3175 // /* private vars */ 3176 // }; 3177 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 3178 RD->startDefinition(); 3179 for (auto &&Pair : Privates) { 3180 auto *VD = Pair.second.Original; 3181 auto Type = VD->getType(); 3182 Type = Type.getNonReferenceType(); 3183 auto *FD = addFieldToRecordDecl(C, RD, Type); 3184 if (VD->hasAttrs()) { 3185 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3186 E(VD->getAttrs().end()); 3187 I != E; ++I) 3188 FD->addAttr(*I); 3189 } 3190 } 3191 RD->completeDefinition(); 3192 return RD; 3193 } 3194 return nullptr; 3195 } 3196 3197 static RecordDecl * 3198 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3199 QualType KmpInt32Ty, 3200 QualType KmpRoutineEntryPointerQTy) { 3201 auto &C = CGM.getContext(); 3202 // Build struct kmp_task_t { 3203 // void * shareds; 3204 // kmp_routine_entry_t routine; 3205 // kmp_int32 part_id; 3206 // kmp_cmplrdata_t data1; 3207 // kmp_cmplrdata_t data2; 3208 // For taskloops additional fields: 3209 // kmp_uint64 lb; 3210 // kmp_uint64 ub; 3211 // kmp_int64 st; 3212 // kmp_int32 liter; 3213 // }; 3214 auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3215 UD->startDefinition(); 3216 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3217 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3218 UD->completeDefinition(); 3219 QualType KmpCmplrdataTy = C.getRecordType(UD); 3220 auto *RD = C.buildImplicitRecord("kmp_task_t"); 3221 RD->startDefinition(); 3222 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3223 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3224 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3225 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3226 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3227 if (isOpenMPTaskLoopDirective(Kind)) { 3228 QualType KmpUInt64Ty = 3229 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3230 QualType KmpInt64Ty = 3231 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3232 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3233 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3234 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3235 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3236 } 3237 RD->completeDefinition(); 3238 return RD; 3239 } 3240 3241 static RecordDecl * 3242 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3243 ArrayRef<PrivateDataTy> Privates) { 3244 auto &C = CGM.getContext(); 3245 // Build struct kmp_task_t_with_privates { 3246 // kmp_task_t task_data; 3247 // .kmp_privates_t. privates; 3248 // }; 3249 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3250 RD->startDefinition(); 3251 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3252 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 3253 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3254 } 3255 RD->completeDefinition(); 3256 return RD; 3257 } 3258 3259 /// \brief Emit a proxy function which accepts kmp_task_t as the second 3260 /// argument. 3261 /// \code 3262 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3263 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3264 /// For taskloops: 3265 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3266 /// tt->shareds); 3267 /// return 0; 3268 /// } 3269 /// \endcode 3270 static llvm::Value * 3271 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3272 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3273 QualType KmpTaskTWithPrivatesPtrQTy, 3274 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3275 QualType SharedsPtrTy, llvm::Value *TaskFunction, 3276 llvm::Value *TaskPrivatesMap) { 3277 auto &C = CGM.getContext(); 3278 FunctionArgList Args; 3279 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3280 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3281 /*Id=*/nullptr, 3282 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3283 Args.push_back(&GtidArg); 3284 Args.push_back(&TaskTypeArg); 3285 auto &TaskEntryFnInfo = 3286 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3287 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3288 auto *TaskEntry = 3289 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 3290 ".omp_task_entry.", &CGM.getModule()); 3291 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 3292 CodeGenFunction CGF(CGM); 3293 CGF.disableDebugInfo(); 3294 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 3295 3296 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3297 // tt, 3298 // For taskloops: 3299 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3300 // tt->task_data.shareds); 3301 auto *GtidParam = CGF.EmitLoadOfScalar( 3302 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3303 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3304 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3305 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3306 auto *KmpTaskTWithPrivatesQTyRD = 3307 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3308 LValue Base = 3309 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3310 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3311 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3312 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3313 auto *PartidParam = PartIdLVal.getPointer(); 3314 3315 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3316 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3317 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3318 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 3319 CGF.ConvertTypeForMem(SharedsPtrTy)); 3320 3321 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3322 llvm::Value *PrivatesParam; 3323 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3324 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3325 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3326 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3327 } else 3328 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3329 3330 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3331 TaskPrivatesMap, 3332 CGF.Builder 3333 .CreatePointerBitCastOrAddrSpaceCast( 3334 TDBase.getAddress(), CGF.VoidPtrTy) 3335 .getPointer()}; 3336 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3337 std::end(CommonArgs)); 3338 if (isOpenMPTaskLoopDirective(Kind)) { 3339 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3340 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3341 auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); 3342 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3343 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3344 auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); 3345 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3346 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 3347 auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); 3348 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3349 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 3350 auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); 3351 CallArgs.push_back(LBParam); 3352 CallArgs.push_back(UBParam); 3353 CallArgs.push_back(StParam); 3354 CallArgs.push_back(LIParam); 3355 } 3356 CallArgs.push_back(SharedsParam); 3357 3358 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 3359 CGF.EmitStoreThroughLValue( 3360 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3361 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3362 CGF.FinishFunction(); 3363 return TaskEntry; 3364 } 3365 3366 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3367 SourceLocation Loc, 3368 QualType KmpInt32Ty, 3369 QualType KmpTaskTWithPrivatesPtrQTy, 3370 QualType KmpTaskTWithPrivatesQTy) { 3371 auto &C = CGM.getContext(); 3372 FunctionArgList Args; 3373 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3374 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3375 /*Id=*/nullptr, 3376 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3377 Args.push_back(&GtidArg); 3378 Args.push_back(&TaskTypeArg); 3379 FunctionType::ExtInfo Info; 3380 auto &DestructorFnInfo = 3381 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3382 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 3383 auto *DestructorFn = 3384 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3385 ".omp_task_destructor.", &CGM.getModule()); 3386 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 3387 DestructorFnInfo); 3388 CodeGenFunction CGF(CGM); 3389 CGF.disableDebugInfo(); 3390 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3391 Args); 3392 3393 LValue Base = CGF.EmitLoadOfPointerLValue( 3394 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3395 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3396 auto *KmpTaskTWithPrivatesQTyRD = 3397 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3398 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3399 Base = CGF.EmitLValueForField(Base, *FI); 3400 for (auto *Field : 3401 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3402 if (auto DtorKind = Field->getType().isDestructedType()) { 3403 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 3404 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3405 } 3406 } 3407 CGF.FinishFunction(); 3408 return DestructorFn; 3409 } 3410 3411 /// \brief Emit a privates mapping function for correct handling of private and 3412 /// firstprivate variables. 3413 /// \code 3414 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3415 /// **noalias priv1,..., <tyn> **noalias privn) { 3416 /// *priv1 = &.privates.priv1; 3417 /// ...; 3418 /// *privn = &.privates.privn; 3419 /// } 3420 /// \endcode 3421 static llvm::Value * 3422 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3423 ArrayRef<const Expr *> PrivateVars, 3424 ArrayRef<const Expr *> FirstprivateVars, 3425 ArrayRef<const Expr *> LastprivateVars, 3426 QualType PrivatesQTy, 3427 ArrayRef<PrivateDataTy> Privates) { 3428 auto &C = CGM.getContext(); 3429 FunctionArgList Args; 3430 ImplicitParamDecl TaskPrivatesArg( 3431 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3432 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 3433 Args.push_back(&TaskPrivatesArg); 3434 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3435 unsigned Counter = 1; 3436 for (auto *E: PrivateVars) { 3437 Args.push_back(ImplicitParamDecl::Create( 3438 C, /*DC=*/nullptr, Loc, 3439 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3440 .withConst() 3441 .withRestrict())); 3442 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3443 PrivateVarsPos[VD] = Counter; 3444 ++Counter; 3445 } 3446 for (auto *E : FirstprivateVars) { 3447 Args.push_back(ImplicitParamDecl::Create( 3448 C, /*DC=*/nullptr, Loc, 3449 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3450 .withConst() 3451 .withRestrict())); 3452 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3453 PrivateVarsPos[VD] = Counter; 3454 ++Counter; 3455 } 3456 for (auto *E: LastprivateVars) { 3457 Args.push_back(ImplicitParamDecl::Create( 3458 C, /*DC=*/nullptr, Loc, 3459 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3460 .withConst() 3461 .withRestrict())); 3462 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3463 PrivateVarsPos[VD] = Counter; 3464 ++Counter; 3465 } 3466 auto &TaskPrivatesMapFnInfo = 3467 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3468 auto *TaskPrivatesMapTy = 3469 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3470 auto *TaskPrivatesMap = llvm::Function::Create( 3471 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 3472 ".omp_task_privates_map.", &CGM.getModule()); 3473 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 3474 TaskPrivatesMapFnInfo); 3475 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3476 CodeGenFunction CGF(CGM); 3477 CGF.disableDebugInfo(); 3478 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3479 TaskPrivatesMapFnInfo, Args); 3480 3481 // *privi = &.privates.privi; 3482 LValue Base = CGF.EmitLoadOfPointerLValue( 3483 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3484 TaskPrivatesArg.getType()->castAs<PointerType>()); 3485 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3486 Counter = 0; 3487 for (auto *Field : PrivatesQTyRD->fields()) { 3488 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 3489 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3490 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3491 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3492 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3493 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 3494 ++Counter; 3495 } 3496 CGF.FinishFunction(); 3497 return TaskPrivatesMap; 3498 } 3499 3500 static int array_pod_sort_comparator(const PrivateDataTy *P1, 3501 const PrivateDataTy *P2) { 3502 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 3503 } 3504 3505 /// Emit initialization for private variables in task-based directives. 3506 static void emitPrivatesInit(CodeGenFunction &CGF, 3507 const OMPExecutableDirective &D, 3508 Address KmpTaskSharedsPtr, LValue TDBase, 3509 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3510 QualType SharedsTy, QualType SharedsPtrTy, 3511 const OMPTaskDataTy &Data, 3512 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3513 auto &C = CGF.getContext(); 3514 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3515 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3516 LValue SrcBase; 3517 if (!Data.FirstprivateVars.empty()) { 3518 SrcBase = CGF.MakeAddrLValue( 3519 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3520 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3521 SharedsTy); 3522 } 3523 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 3524 cast<CapturedStmt>(*D.getAssociatedStmt())); 3525 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3526 for (auto &&Pair : Privates) { 3527 auto *VD = Pair.second.PrivateCopy; 3528 auto *Init = VD->getAnyInitializer(); 3529 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3530 !CGF.isTrivialInitializer(Init)))) { 3531 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3532 if (auto *Elem = Pair.second.PrivateElemInit) { 3533 auto *OriginalVD = Pair.second.Original; 3534 auto *SharedField = CapturesInfo.lookup(OriginalVD); 3535 auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3536 SharedRefLValue = CGF.MakeAddrLValue( 3537 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 3538 SharedRefLValue.getType(), AlignmentSource::Decl); 3539 QualType Type = OriginalVD->getType(); 3540 if (Type->isArrayType()) { 3541 // Initialize firstprivate array. 3542 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3543 // Perform simple memcpy. 3544 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 3545 SharedRefLValue.getAddress(), Type); 3546 } else { 3547 // Initialize firstprivate array using element-by-element 3548 // intialization. 3549 CGF.EmitOMPAggregateAssign( 3550 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 3551 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3552 Address SrcElement) { 3553 // Clean up any temporaries needed by the initialization. 3554 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3555 InitScope.addPrivate( 3556 Elem, [SrcElement]() -> Address { return SrcElement; }); 3557 (void)InitScope.Privatize(); 3558 // Emit initialization for single element. 3559 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3560 CGF, &CapturesInfo); 3561 CGF.EmitAnyExprToMem(Init, DestElement, 3562 Init->getType().getQualifiers(), 3563 /*IsInitializer=*/false); 3564 }); 3565 } 3566 } else { 3567 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3568 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 3569 return SharedRefLValue.getAddress(); 3570 }); 3571 (void)InitScope.Privatize(); 3572 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3573 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3574 /*capturedByInit=*/false); 3575 } 3576 } else 3577 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3578 } 3579 ++FI; 3580 } 3581 } 3582 3583 /// Check if duplication function is required for taskloops. 3584 static bool checkInitIsRequired(CodeGenFunction &CGF, 3585 ArrayRef<PrivateDataTy> Privates) { 3586 bool InitRequired = false; 3587 for (auto &&Pair : Privates) { 3588 auto *VD = Pair.second.PrivateCopy; 3589 auto *Init = VD->getAnyInitializer(); 3590 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3591 !CGF.isTrivialInitializer(Init)); 3592 } 3593 return InitRequired; 3594 } 3595 3596 3597 /// Emit task_dup function (for initialization of 3598 /// private/firstprivate/lastprivate vars and last_iter flag) 3599 /// \code 3600 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3601 /// lastpriv) { 3602 /// // setup lastprivate flag 3603 /// task_dst->last = lastpriv; 3604 /// // could be constructor calls here... 3605 /// } 3606 /// \endcode 3607 static llvm::Value * 3608 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3609 const OMPExecutableDirective &D, 3610 QualType KmpTaskTWithPrivatesPtrQTy, 3611 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3612 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3613 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3614 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3615 auto &C = CGM.getContext(); 3616 FunctionArgList Args; 3617 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, 3618 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3619 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, 3620 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3621 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, 3622 /*Id=*/nullptr, C.IntTy); 3623 Args.push_back(&DstArg); 3624 Args.push_back(&SrcArg); 3625 Args.push_back(&LastprivArg); 3626 auto &TaskDupFnInfo = 3627 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3628 auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3629 auto *TaskDup = 3630 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 3631 ".omp_task_dup.", &CGM.getModule()); 3632 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); 3633 CodeGenFunction CGF(CGM); 3634 CGF.disableDebugInfo(); 3635 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); 3636 3637 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3638 CGF.GetAddrOfLocalVar(&DstArg), 3639 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3640 // task_dst->liter = lastpriv; 3641 if (WithLastIter) { 3642 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3643 LValue Base = CGF.EmitLValueForField( 3644 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3645 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3646 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3647 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3648 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3649 } 3650 3651 // Emit initial values for private copies (if any). 3652 assert(!Privates.empty()); 3653 Address KmpTaskSharedsPtr = Address::invalid(); 3654 if (!Data.FirstprivateVars.empty()) { 3655 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3656 CGF.GetAddrOfLocalVar(&SrcArg), 3657 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3658 LValue Base = CGF.EmitLValueForField( 3659 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3660 KmpTaskSharedsPtr = Address( 3661 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3662 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3663 KmpTaskTShareds)), 3664 Loc), 3665 CGF.getNaturalTypeAlignment(SharedsTy)); 3666 } 3667 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3668 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3669 CGF.FinishFunction(); 3670 return TaskDup; 3671 } 3672 3673 /// Checks if destructor function is required to be generated. 3674 /// \return true if cleanups are required, false otherwise. 3675 static bool 3676 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 3677 bool NeedsCleanup = false; 3678 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3679 auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 3680 for (auto *FD : PrivateRD->fields()) { 3681 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 3682 if (NeedsCleanup) 3683 break; 3684 } 3685 return NeedsCleanup; 3686 } 3687 3688 CGOpenMPRuntime::TaskResultTy 3689 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3690 const OMPExecutableDirective &D, 3691 llvm::Value *TaskFunction, QualType SharedsTy, 3692 Address Shareds, const OMPTaskDataTy &Data) { 3693 auto &C = CGM.getContext(); 3694 llvm::SmallVector<PrivateDataTy, 4> Privates; 3695 // Aggregate privates and sort them by the alignment. 3696 auto I = Data.PrivateCopies.begin(); 3697 for (auto *E : Data.PrivateVars) { 3698 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3699 Privates.push_back(std::make_pair( 3700 C.getDeclAlign(VD), 3701 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3702 /*PrivateElemInit=*/nullptr))); 3703 ++I; 3704 } 3705 I = Data.FirstprivateCopies.begin(); 3706 auto IElemInitRef = Data.FirstprivateInits.begin(); 3707 for (auto *E : Data.FirstprivateVars) { 3708 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3709 Privates.push_back(std::make_pair( 3710 C.getDeclAlign(VD), 3711 PrivateHelpersTy( 3712 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3713 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 3714 ++I; 3715 ++IElemInitRef; 3716 } 3717 I = Data.LastprivateCopies.begin(); 3718 for (auto *E : Data.LastprivateVars) { 3719 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3720 Privates.push_back(std::make_pair( 3721 C.getDeclAlign(VD), 3722 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3723 /*PrivateElemInit=*/nullptr))); 3724 ++I; 3725 } 3726 llvm::array_pod_sort(Privates.begin(), Privates.end(), 3727 array_pod_sort_comparator); 3728 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3729 // Build type kmp_routine_entry_t (if not built yet). 3730 emitKmpRoutineEntryT(KmpInt32Ty); 3731 // Build type kmp_task_t (if not built yet). 3732 if (KmpTaskTQTy.isNull()) { 3733 KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3734 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3735 } 3736 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3737 // Build particular struct kmp_task_t for the given task. 3738 auto *KmpTaskTWithPrivatesQTyRD = 3739 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3740 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3741 QualType KmpTaskTWithPrivatesPtrQTy = 3742 C.getPointerType(KmpTaskTWithPrivatesQTy); 3743 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3744 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 3745 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3746 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3747 3748 // Emit initial values for private copies (if any). 3749 llvm::Value *TaskPrivatesMap = nullptr; 3750 auto *TaskPrivatesMapTy = 3751 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 3752 3) 3753 ->getType(); 3754 if (!Privates.empty()) { 3755 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3756 TaskPrivatesMap = emitTaskPrivateMappingFunction( 3757 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 3758 FI->getType(), Privates); 3759 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3760 TaskPrivatesMap, TaskPrivatesMapTy); 3761 } else { 3762 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3763 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3764 } 3765 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3766 // kmp_task_t *tt); 3767 auto *TaskEntry = emitProxyTaskFunction( 3768 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3769 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3770 TaskPrivatesMap); 3771 3772 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3773 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3774 // kmp_routine_entry_t *task_entry); 3775 // Task flags. Format is taken from 3776 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 3777 // description of kmp_tasking_flags struct. 3778 enum { 3779 TiedFlag = 0x1, 3780 FinalFlag = 0x2, 3781 DestructorsFlag = 0x8, 3782 PriorityFlag = 0x20 3783 }; 3784 unsigned Flags = Data.Tied ? TiedFlag : 0; 3785 bool NeedsCleanup = false; 3786 if (!Privates.empty()) { 3787 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 3788 if (NeedsCleanup) 3789 Flags = Flags | DestructorsFlag; 3790 } 3791 if (Data.Priority.getInt()) 3792 Flags = Flags | PriorityFlag; 3793 auto *TaskFlags = 3794 Data.Final.getPointer() 3795 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3796 CGF.Builder.getInt32(FinalFlag), 3797 CGF.Builder.getInt32(/*C=*/0)) 3798 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3799 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3800 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3801 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 3802 getThreadID(CGF, Loc), TaskFlags, 3803 KmpTaskTWithPrivatesTySize, SharedsSize, 3804 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3805 TaskEntry, KmpRoutineEntryPtrTy)}; 3806 auto *NewTask = CGF.EmitRuntimeCall( 3807 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 3808 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3809 NewTask, KmpTaskTWithPrivatesPtrTy); 3810 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 3811 KmpTaskTWithPrivatesQTy); 3812 LValue TDBase = 3813 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3814 // Fill the data in the resulting kmp_task_t record. 3815 // Copy shareds if there are any. 3816 Address KmpTaskSharedsPtr = Address::invalid(); 3817 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3818 KmpTaskSharedsPtr = 3819 Address(CGF.EmitLoadOfScalar( 3820 CGF.EmitLValueForField( 3821 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 3822 KmpTaskTShareds)), 3823 Loc), 3824 CGF.getNaturalTypeAlignment(SharedsTy)); 3825 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 3826 } 3827 // Emit initial values for private copies (if any). 3828 TaskResultTy Result; 3829 if (!Privates.empty()) { 3830 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3831 SharedsTy, SharedsPtrTy, Data, Privates, 3832 /*ForDup=*/false); 3833 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3834 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3835 Result.TaskDupFn = emitTaskDupFunction( 3836 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3837 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3838 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3839 } 3840 } 3841 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 3842 enum { Priority = 0, Destructors = 1 }; 3843 // Provide pointer to function with destructors for privates. 3844 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 3845 auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); 3846 if (NeedsCleanup) { 3847 llvm::Value *DestructorFn = emitDestructorsFunction( 3848 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3849 KmpTaskTWithPrivatesQTy); 3850 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 3851 LValue DestructorsLV = CGF.EmitLValueForField( 3852 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 3853 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3854 DestructorFn, KmpRoutineEntryPtrTy), 3855 DestructorsLV); 3856 } 3857 // Set priority. 3858 if (Data.Priority.getInt()) { 3859 LValue Data2LV = CGF.EmitLValueForField( 3860 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 3861 LValue PriorityLV = CGF.EmitLValueForField( 3862 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 3863 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 3864 } 3865 Result.NewTask = NewTask; 3866 Result.TaskEntry = TaskEntry; 3867 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3868 Result.TDBase = TDBase; 3869 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3870 return Result; 3871 } 3872 3873 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 3874 const OMPExecutableDirective &D, 3875 llvm::Value *TaskFunction, 3876 QualType SharedsTy, Address Shareds, 3877 const Expr *IfCond, 3878 const OMPTaskDataTy &Data) { 3879 if (!CGF.HaveInsertPoint()) 3880 return; 3881 3882 TaskResultTy Result = 3883 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 3884 llvm::Value *NewTask = Result.NewTask; 3885 llvm::Value *TaskEntry = Result.TaskEntry; 3886 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 3887 LValue TDBase = Result.TDBase; 3888 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 3889 auto &C = CGM.getContext(); 3890 // Process list of dependences. 3891 Address DependenciesArray = Address::invalid(); 3892 unsigned NumDependencies = Data.Dependences.size(); 3893 if (NumDependencies) { 3894 // Dependence kind for RTL. 3895 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 3896 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 3897 RecordDecl *KmpDependInfoRD; 3898 QualType FlagsTy = 3899 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 3900 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 3901 if (KmpDependInfoTy.isNull()) { 3902 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 3903 KmpDependInfoRD->startDefinition(); 3904 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 3905 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 3906 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 3907 KmpDependInfoRD->completeDefinition(); 3908 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 3909 } else 3910 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 3911 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 3912 // Define type kmp_depend_info[<Dependences.size()>]; 3913 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 3914 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 3915 ArrayType::Normal, /*IndexTypeQuals=*/0); 3916 // kmp_depend_info[<Dependences.size()>] deps; 3917 DependenciesArray = 3918 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 3919 for (unsigned i = 0; i < NumDependencies; ++i) { 3920 const Expr *E = Data.Dependences[i].second; 3921 auto Addr = CGF.EmitLValue(E); 3922 llvm::Value *Size; 3923 QualType Ty = E->getType(); 3924 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3925 LValue UpAddrLVal = 3926 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 3927 llvm::Value *UpAddr = 3928 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 3929 llvm::Value *LowIntPtr = 3930 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 3931 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 3932 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3933 } else 3934 Size = CGF.getTypeSize(Ty); 3935 auto Base = CGF.MakeAddrLValue( 3936 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 3937 KmpDependInfoTy); 3938 // deps[i].base_addr = &<Dependences[i].second>; 3939 auto BaseAddrLVal = CGF.EmitLValueForField( 3940 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 3941 CGF.EmitStoreOfScalar( 3942 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 3943 BaseAddrLVal); 3944 // deps[i].len = sizeof(<Dependences[i].second>); 3945 auto LenLVal = CGF.EmitLValueForField( 3946 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 3947 CGF.EmitStoreOfScalar(Size, LenLVal); 3948 // deps[i].flags = <Dependences[i].first>; 3949 RTLDependenceKindTy DepKind; 3950 switch (Data.Dependences[i].first) { 3951 case OMPC_DEPEND_in: 3952 DepKind = DepIn; 3953 break; 3954 // Out and InOut dependencies must use the same code. 3955 case OMPC_DEPEND_out: 3956 case OMPC_DEPEND_inout: 3957 DepKind = DepInOut; 3958 break; 3959 case OMPC_DEPEND_source: 3960 case OMPC_DEPEND_sink: 3961 case OMPC_DEPEND_unknown: 3962 llvm_unreachable("Unknown task dependence type"); 3963 } 3964 auto FlagsLVal = CGF.EmitLValueForField( 3965 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 3966 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 3967 FlagsLVal); 3968 } 3969 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3970 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 3971 CGF.VoidPtrTy); 3972 } 3973 3974 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 3975 // libcall. 3976 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 3977 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 3978 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 3979 // list is not empty 3980 auto *ThreadID = getThreadID(CGF, Loc); 3981 auto *UpLoc = emitUpdateLocation(CGF, Loc); 3982 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 3983 llvm::Value *DepTaskArgs[7]; 3984 if (NumDependencies) { 3985 DepTaskArgs[0] = UpLoc; 3986 DepTaskArgs[1] = ThreadID; 3987 DepTaskArgs[2] = NewTask; 3988 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 3989 DepTaskArgs[4] = DependenciesArray.getPointer(); 3990 DepTaskArgs[5] = CGF.Builder.getInt32(0); 3991 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3992 } 3993 auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD, 3994 NumDependencies, &TaskArgs, 3995 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 3996 if (!Data.Tied) { 3997 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3998 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 3999 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4000 } 4001 if (NumDependencies) { 4002 CGF.EmitRuntimeCall( 4003 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 4004 } else { 4005 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 4006 TaskArgs); 4007 } 4008 // Check if parent region is untied and build return for untied task; 4009 if (auto *Region = 4010 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4011 Region->emitUntiedSwitch(CGF); 4012 }; 4013 4014 llvm::Value *DepWaitTaskArgs[6]; 4015 if (NumDependencies) { 4016 DepWaitTaskArgs[0] = UpLoc; 4017 DepWaitTaskArgs[1] = ThreadID; 4018 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 4019 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4020 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4021 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4022 } 4023 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 4024 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, 4025 PrePostActionTy &) { 4026 auto &RT = CGF.CGM.getOpenMPRuntime(); 4027 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4028 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4029 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4030 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4031 // is specified. 4032 if (NumDependencies) 4033 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 4034 DepWaitTaskArgs); 4035 // Call proxy_task_entry(gtid, new_task); 4036 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( 4037 CodeGenFunction &CGF, PrePostActionTy &Action) { 4038 Action.Enter(CGF); 4039 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4040 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 4041 }; 4042 4043 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4044 // kmp_task_t *new_task); 4045 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4046 // kmp_task_t *new_task); 4047 RegionCodeGenTy RCG(CodeGen); 4048 CommonActionTy Action( 4049 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 4050 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 4051 RCG.setAction(Action); 4052 RCG(CGF); 4053 }; 4054 4055 if (IfCond) 4056 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4057 else { 4058 RegionCodeGenTy ThenRCG(ThenCodeGen); 4059 ThenRCG(CGF); 4060 } 4061 } 4062 4063 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4064 const OMPLoopDirective &D, 4065 llvm::Value *TaskFunction, 4066 QualType SharedsTy, Address Shareds, 4067 const Expr *IfCond, 4068 const OMPTaskDataTy &Data) { 4069 if (!CGF.HaveInsertPoint()) 4070 return; 4071 TaskResultTy Result = 4072 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4073 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4074 // libcall. 4075 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4076 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4077 // sched, kmp_uint64 grainsize, void *task_dup); 4078 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4079 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4080 llvm::Value *IfVal; 4081 if (IfCond) { 4082 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4083 /*isSigned=*/true); 4084 } else 4085 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4086 4087 LValue LBLVal = CGF.EmitLValueForField( 4088 Result.TDBase, 4089 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4090 auto *LBVar = 4091 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4092 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4093 /*IsInitializer=*/true); 4094 LValue UBLVal = CGF.EmitLValueForField( 4095 Result.TDBase, 4096 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4097 auto *UBVar = 4098 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4099 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4100 /*IsInitializer=*/true); 4101 LValue StLVal = CGF.EmitLValueForField( 4102 Result.TDBase, 4103 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4104 auto *StVar = 4105 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4106 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4107 /*IsInitializer=*/true); 4108 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4109 llvm::Value *TaskArgs[] = { 4110 UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), 4111 UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), 4112 llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), 4113 llvm::ConstantInt::getSigned( 4114 CGF.IntTy, Data.Schedule.getPointer() 4115 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4116 : NoSchedule), 4117 Data.Schedule.getPointer() 4118 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4119 /*isSigned=*/false) 4120 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4121 Result.TaskDupFn 4122 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, 4123 CGF.VoidPtrTy) 4124 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4125 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 4126 } 4127 4128 /// \brief Emit reduction operation for each element of array (required for 4129 /// array sections) LHS op = RHS. 4130 /// \param Type Type of array. 4131 /// \param LHSVar Variable on the left side of the reduction operation 4132 /// (references element of array in original variable). 4133 /// \param RHSVar Variable on the right side of the reduction operation 4134 /// (references element of array in original variable). 4135 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4136 /// RHSVar. 4137 static void EmitOMPAggregateReduction( 4138 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4139 const VarDecl *RHSVar, 4140 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4141 const Expr *, const Expr *)> &RedOpGen, 4142 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4143 const Expr *UpExpr = nullptr) { 4144 // Perform element-by-element initialization. 4145 QualType ElementTy; 4146 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4147 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4148 4149 // Drill down to the base element type on both arrays. 4150 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 4151 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4152 4153 auto RHSBegin = RHSAddr.getPointer(); 4154 auto LHSBegin = LHSAddr.getPointer(); 4155 // Cast from pointer to array type to pointer to single element. 4156 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 4157 // The basic structure here is a while-do loop. 4158 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4159 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4160 auto IsEmpty = 4161 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4162 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4163 4164 // Enter the loop body, making that address the current address. 4165 auto EntryBB = CGF.Builder.GetInsertBlock(); 4166 CGF.EmitBlock(BodyBB); 4167 4168 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4169 4170 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4171 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4172 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4173 Address RHSElementCurrent = 4174 Address(RHSElementPHI, 4175 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4176 4177 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4178 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4179 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4180 Address LHSElementCurrent = 4181 Address(LHSElementPHI, 4182 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4183 4184 // Emit copy. 4185 CodeGenFunction::OMPPrivateScope Scope(CGF); 4186 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 4187 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 4188 Scope.Privatize(); 4189 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4190 Scope.ForceCleanup(); 4191 4192 // Shift the address forward by one element. 4193 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4194 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 4195 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4196 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 4197 // Check whether we've reached the end. 4198 auto Done = 4199 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4200 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4201 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4202 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4203 4204 // Done. 4205 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4206 } 4207 4208 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4209 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4210 /// UDR combiner function. 4211 static void emitReductionCombiner(CodeGenFunction &CGF, 4212 const Expr *ReductionOp) { 4213 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4214 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4215 if (auto *DRE = 4216 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4217 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4218 std::pair<llvm::Function *, llvm::Function *> Reduction = 4219 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4220 RValue Func = RValue::get(Reduction.first); 4221 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4222 CGF.EmitIgnoredExpr(ReductionOp); 4223 return; 4224 } 4225 CGF.EmitIgnoredExpr(ReductionOp); 4226 } 4227 4228 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 4229 llvm::Type *ArgsType, 4230 ArrayRef<const Expr *> Privates, 4231 ArrayRef<const Expr *> LHSExprs, 4232 ArrayRef<const Expr *> RHSExprs, 4233 ArrayRef<const Expr *> ReductionOps) { 4234 auto &C = CGM.getContext(); 4235 4236 // void reduction_func(void *LHSArg, void *RHSArg); 4237 FunctionArgList Args; 4238 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4239 C.VoidPtrTy); 4240 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4241 C.VoidPtrTy); 4242 Args.push_back(&LHSArg); 4243 Args.push_back(&RHSArg); 4244 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4245 auto *Fn = llvm::Function::Create( 4246 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 4247 ".omp.reduction.reduction_func", &CGM.getModule()); 4248 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 4249 CodeGenFunction CGF(CGM); 4250 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 4251 4252 // Dst = (void*[n])(LHSArg); 4253 // Src = (void*[n])(RHSArg); 4254 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4255 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4256 ArgsType), CGF.getPointerAlign()); 4257 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4258 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4259 ArgsType), CGF.getPointerAlign()); 4260 4261 // ... 4262 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4263 // ... 4264 CodeGenFunction::OMPPrivateScope Scope(CGF); 4265 auto IPriv = Privates.begin(); 4266 unsigned Idx = 0; 4267 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4268 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4269 Scope.addPrivate(RHSVar, [&]() -> Address { 4270 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 4271 }); 4272 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4273 Scope.addPrivate(LHSVar, [&]() -> Address { 4274 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 4275 }); 4276 QualType PrivTy = (*IPriv)->getType(); 4277 if (PrivTy->isVariablyModifiedType()) { 4278 // Get array size and emit VLA type. 4279 ++Idx; 4280 Address Elem = 4281 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 4282 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4283 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 4284 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4285 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4286 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4287 CGF.EmitVariablyModifiedType(PrivTy); 4288 } 4289 } 4290 Scope.Privatize(); 4291 IPriv = Privates.begin(); 4292 auto ILHS = LHSExprs.begin(); 4293 auto IRHS = RHSExprs.begin(); 4294 for (auto *E : ReductionOps) { 4295 if ((*IPriv)->getType()->isArrayType()) { 4296 // Emit reduction for array section. 4297 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4298 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4299 EmitOMPAggregateReduction( 4300 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4301 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4302 emitReductionCombiner(CGF, E); 4303 }); 4304 } else 4305 // Emit reduction for array subscript or single variable. 4306 emitReductionCombiner(CGF, E); 4307 ++IPriv; 4308 ++ILHS; 4309 ++IRHS; 4310 } 4311 Scope.ForceCleanup(); 4312 CGF.FinishFunction(); 4313 return Fn; 4314 } 4315 4316 static void emitSingleReductionCombiner(CodeGenFunction &CGF, 4317 const Expr *ReductionOp, 4318 const Expr *PrivateRef, 4319 const DeclRefExpr *LHS, 4320 const DeclRefExpr *RHS) { 4321 if (PrivateRef->getType()->isArrayType()) { 4322 // Emit reduction for array section. 4323 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4324 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4325 EmitOMPAggregateReduction( 4326 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4327 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4328 emitReductionCombiner(CGF, ReductionOp); 4329 }); 4330 } else 4331 // Emit reduction for array subscript or single variable. 4332 emitReductionCombiner(CGF, ReductionOp); 4333 } 4334 4335 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4336 ArrayRef<const Expr *> Privates, 4337 ArrayRef<const Expr *> LHSExprs, 4338 ArrayRef<const Expr *> RHSExprs, 4339 ArrayRef<const Expr *> ReductionOps, 4340 bool WithNowait, bool SimpleReduction) { 4341 if (!CGF.HaveInsertPoint()) 4342 return; 4343 // Next code should be emitted for reduction: 4344 // 4345 // static kmp_critical_name lock = { 0 }; 4346 // 4347 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4348 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4349 // ... 4350 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4351 // *(Type<n>-1*)rhs[<n>-1]); 4352 // } 4353 // 4354 // ... 4355 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4356 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4357 // RedList, reduce_func, &<lock>)) { 4358 // case 1: 4359 // ... 4360 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4361 // ... 4362 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4363 // break; 4364 // case 2: 4365 // ... 4366 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4367 // ... 4368 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4369 // break; 4370 // default:; 4371 // } 4372 // 4373 // if SimpleReduction is true, only the next code is generated: 4374 // ... 4375 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4376 // ... 4377 4378 auto &C = CGM.getContext(); 4379 4380 if (SimpleReduction) { 4381 CodeGenFunction::RunCleanupsScope Scope(CGF); 4382 auto IPriv = Privates.begin(); 4383 auto ILHS = LHSExprs.begin(); 4384 auto IRHS = RHSExprs.begin(); 4385 for (auto *E : ReductionOps) { 4386 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4387 cast<DeclRefExpr>(*IRHS)); 4388 ++IPriv; 4389 ++ILHS; 4390 ++IRHS; 4391 } 4392 return; 4393 } 4394 4395 // 1. Build a list of reduction variables. 4396 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4397 auto Size = RHSExprs.size(); 4398 for (auto *E : Privates) { 4399 if (E->getType()->isVariablyModifiedType()) 4400 // Reserve place for array size. 4401 ++Size; 4402 } 4403 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4404 QualType ReductionArrayTy = 4405 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 4406 /*IndexTypeQuals=*/0); 4407 Address ReductionList = 4408 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4409 auto IPriv = Privates.begin(); 4410 unsigned Idx = 0; 4411 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4412 Address Elem = 4413 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 4414 CGF.Builder.CreateStore( 4415 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4416 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 4417 Elem); 4418 if ((*IPriv)->getType()->isVariablyModifiedType()) { 4419 // Store array size. 4420 ++Idx; 4421 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 4422 CGF.getPointerSize()); 4423 llvm::Value *Size = CGF.Builder.CreateIntCast( 4424 CGF.getVLASize( 4425 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 4426 .first, 4427 CGF.SizeTy, /*isSigned=*/false); 4428 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 4429 Elem); 4430 } 4431 } 4432 4433 // 2. Emit reduce_func(). 4434 auto *ReductionFn = emitReductionFunction( 4435 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 4436 LHSExprs, RHSExprs, ReductionOps); 4437 4438 // 3. Create static kmp_critical_name lock = { 0 }; 4439 auto *Lock = getCriticalRegionLock(".reduction"); 4440 4441 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4442 // RedList, reduce_func, &<lock>); 4443 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 4444 auto *ThreadId = getThreadID(CGF, Loc); 4445 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 4446 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4447 ReductionList.getPointer(), CGF.VoidPtrTy); 4448 llvm::Value *Args[] = { 4449 IdentTLoc, // ident_t *<loc> 4450 ThreadId, // i32 <gtid> 4451 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 4452 ReductionArrayTySize, // size_type sizeof(RedList) 4453 RL, // void *RedList 4454 ReductionFn, // void (*) (void *, void *) <reduce_func> 4455 Lock // kmp_critical_name *&<lock> 4456 }; 4457 auto Res = CGF.EmitRuntimeCall( 4458 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 4459 : OMPRTL__kmpc_reduce), 4460 Args); 4461 4462 // 5. Build switch(res) 4463 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 4464 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 4465 4466 // 6. Build case 1: 4467 // ... 4468 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4469 // ... 4470 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4471 // break; 4472 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 4473 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 4474 CGF.EmitBlock(Case1BB); 4475 4476 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4477 llvm::Value *EndArgs[] = { 4478 IdentTLoc, // ident_t *<loc> 4479 ThreadId, // i32 <gtid> 4480 Lock // kmp_critical_name *&<lock> 4481 }; 4482 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4483 CodeGenFunction &CGF, PrePostActionTy &Action) { 4484 auto IPriv = Privates.begin(); 4485 auto ILHS = LHSExprs.begin(); 4486 auto IRHS = RHSExprs.begin(); 4487 for (auto *E : ReductionOps) { 4488 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4489 cast<DeclRefExpr>(*IRHS)); 4490 ++IPriv; 4491 ++ILHS; 4492 ++IRHS; 4493 } 4494 }; 4495 RegionCodeGenTy RCG(CodeGen); 4496 CommonActionTy Action( 4497 nullptr, llvm::None, 4498 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 4499 : OMPRTL__kmpc_end_reduce), 4500 EndArgs); 4501 RCG.setAction(Action); 4502 RCG(CGF); 4503 4504 CGF.EmitBranch(DefaultBB); 4505 4506 // 7. Build case 2: 4507 // ... 4508 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4509 // ... 4510 // break; 4511 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 4512 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 4513 CGF.EmitBlock(Case2BB); 4514 4515 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4516 CodeGenFunction &CGF, PrePostActionTy &Action) { 4517 auto ILHS = LHSExprs.begin(); 4518 auto IRHS = RHSExprs.begin(); 4519 auto IPriv = Privates.begin(); 4520 for (auto *E : ReductionOps) { 4521 const Expr *XExpr = nullptr; 4522 const Expr *EExpr = nullptr; 4523 const Expr *UpExpr = nullptr; 4524 BinaryOperatorKind BO = BO_Comma; 4525 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 4526 if (BO->getOpcode() == BO_Assign) { 4527 XExpr = BO->getLHS(); 4528 UpExpr = BO->getRHS(); 4529 } 4530 } 4531 // Try to emit update expression as a simple atomic. 4532 auto *RHSExpr = UpExpr; 4533 if (RHSExpr) { 4534 // Analyze RHS part of the whole expression. 4535 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 4536 RHSExpr->IgnoreParenImpCasts())) { 4537 // If this is a conditional operator, analyze its condition for 4538 // min/max reduction operator. 4539 RHSExpr = ACO->getCond(); 4540 } 4541 if (auto *BORHS = 4542 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 4543 EExpr = BORHS->getRHS(); 4544 BO = BORHS->getOpcode(); 4545 } 4546 } 4547 if (XExpr) { 4548 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4549 auto &&AtomicRedGen = [BO, VD, IPriv, 4550 Loc](CodeGenFunction &CGF, const Expr *XExpr, 4551 const Expr *EExpr, const Expr *UpExpr) { 4552 LValue X = CGF.EmitLValue(XExpr); 4553 RValue E; 4554 if (EExpr) 4555 E = CGF.EmitAnyExpr(EExpr); 4556 CGF.EmitOMPAtomicSimpleUpdateExpr( 4557 X, E, BO, /*IsXLHSInRHSPart=*/true, 4558 llvm::AtomicOrdering::Monotonic, Loc, 4559 [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { 4560 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4561 PrivateScope.addPrivate( 4562 VD, [&CGF, VD, XRValue, Loc]() -> Address { 4563 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 4564 CGF.emitOMPSimpleStore( 4565 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 4566 VD->getType().getNonReferenceType(), Loc); 4567 return LHSTemp; 4568 }); 4569 (void)PrivateScope.Privatize(); 4570 return CGF.EmitAnyExpr(UpExpr); 4571 }); 4572 }; 4573 if ((*IPriv)->getType()->isArrayType()) { 4574 // Emit atomic reduction for array section. 4575 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4576 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 4577 AtomicRedGen, XExpr, EExpr, UpExpr); 4578 } else 4579 // Emit atomic reduction for array subscript or single variable. 4580 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 4581 } else { 4582 // Emit as a critical region. 4583 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 4584 const Expr *, const Expr *) { 4585 auto &RT = CGF.CGM.getOpenMPRuntime(); 4586 RT.emitCriticalRegion( 4587 CGF, ".atomic_reduction", 4588 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 4589 Action.Enter(CGF); 4590 emitReductionCombiner(CGF, E); 4591 }, 4592 Loc); 4593 }; 4594 if ((*IPriv)->getType()->isArrayType()) { 4595 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4596 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4597 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4598 CritRedGen); 4599 } else 4600 CritRedGen(CGF, nullptr, nullptr, nullptr); 4601 } 4602 ++ILHS; 4603 ++IRHS; 4604 ++IPriv; 4605 } 4606 }; 4607 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 4608 if (!WithNowait) { 4609 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 4610 llvm::Value *EndArgs[] = { 4611 IdentTLoc, // ident_t *<loc> 4612 ThreadId, // i32 <gtid> 4613 Lock // kmp_critical_name *&<lock> 4614 }; 4615 CommonActionTy Action(nullptr, llvm::None, 4616 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 4617 EndArgs); 4618 AtomicRCG.setAction(Action); 4619 AtomicRCG(CGF); 4620 } else 4621 AtomicRCG(CGF); 4622 4623 CGF.EmitBranch(DefaultBB); 4624 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 4625 } 4626 4627 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 4628 SourceLocation Loc) { 4629 if (!CGF.HaveInsertPoint()) 4630 return; 4631 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 4632 // global_tid); 4633 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 4634 // Ignore return result until untied tasks are supported. 4635 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 4636 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4637 Region->emitUntiedSwitch(CGF); 4638 } 4639 4640 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 4641 OpenMPDirectiveKind InnerKind, 4642 const RegionCodeGenTy &CodeGen, 4643 bool HasCancel) { 4644 if (!CGF.HaveInsertPoint()) 4645 return; 4646 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 4647 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 4648 } 4649 4650 namespace { 4651 enum RTCancelKind { 4652 CancelNoreq = 0, 4653 CancelParallel = 1, 4654 CancelLoop = 2, 4655 CancelSections = 3, 4656 CancelTaskgroup = 4 4657 }; 4658 } // anonymous namespace 4659 4660 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 4661 RTCancelKind CancelKind = CancelNoreq; 4662 if (CancelRegion == OMPD_parallel) 4663 CancelKind = CancelParallel; 4664 else if (CancelRegion == OMPD_for) 4665 CancelKind = CancelLoop; 4666 else if (CancelRegion == OMPD_sections) 4667 CancelKind = CancelSections; 4668 else { 4669 assert(CancelRegion == OMPD_taskgroup); 4670 CancelKind = CancelTaskgroup; 4671 } 4672 return CancelKind; 4673 } 4674 4675 void CGOpenMPRuntime::emitCancellationPointCall( 4676 CodeGenFunction &CGF, SourceLocation Loc, 4677 OpenMPDirectiveKind CancelRegion) { 4678 if (!CGF.HaveInsertPoint()) 4679 return; 4680 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 4681 // global_tid, kmp_int32 cncl_kind); 4682 if (auto *OMPRegionInfo = 4683 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4684 if (OMPRegionInfo->hasCancel()) { 4685 llvm::Value *Args[] = { 4686 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 4687 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4688 // Ignore return result until untied tasks are supported. 4689 auto *Result = CGF.EmitRuntimeCall( 4690 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 4691 // if (__kmpc_cancellationpoint()) { 4692 // __kmpc_cancel_barrier(); 4693 // exit from construct; 4694 // } 4695 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4696 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4697 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4698 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4699 CGF.EmitBlock(ExitBB); 4700 // __kmpc_cancel_barrier(); 4701 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 4702 // exit from construct; 4703 auto CancelDest = 4704 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4705 CGF.EmitBranchThroughCleanup(CancelDest); 4706 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4707 } 4708 } 4709 } 4710 4711 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 4712 const Expr *IfCond, 4713 OpenMPDirectiveKind CancelRegion) { 4714 if (!CGF.HaveInsertPoint()) 4715 return; 4716 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 4717 // kmp_int32 cncl_kind); 4718 if (auto *OMPRegionInfo = 4719 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4720 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 4721 PrePostActionTy &) { 4722 auto &RT = CGF.CGM.getOpenMPRuntime(); 4723 llvm::Value *Args[] = { 4724 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 4725 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4726 // Ignore return result until untied tasks are supported. 4727 auto *Result = CGF.EmitRuntimeCall( 4728 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 4729 // if (__kmpc_cancel()) { 4730 // __kmpc_cancel_barrier(); 4731 // exit from construct; 4732 // } 4733 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4734 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4735 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4736 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4737 CGF.EmitBlock(ExitBB); 4738 // __kmpc_cancel_barrier(); 4739 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 4740 // exit from construct; 4741 auto CancelDest = 4742 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4743 CGF.EmitBranchThroughCleanup(CancelDest); 4744 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4745 }; 4746 if (IfCond) 4747 emitOMPIfClause(CGF, IfCond, ThenGen, 4748 [](CodeGenFunction &, PrePostActionTy &) {}); 4749 else { 4750 RegionCodeGenTy ThenRCG(ThenGen); 4751 ThenRCG(CGF); 4752 } 4753 } 4754 } 4755 4756 /// \brief Obtain information that uniquely identifies a target entry. This 4757 /// consists of the file and device IDs as well as line number associated with 4758 /// the relevant entry source location. 4759 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 4760 unsigned &DeviceID, unsigned &FileID, 4761 unsigned &LineNum) { 4762 4763 auto &SM = C.getSourceManager(); 4764 4765 // The loc should be always valid and have a file ID (the user cannot use 4766 // #pragma directives in macros) 4767 4768 assert(Loc.isValid() && "Source location is expected to be always valid."); 4769 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 4770 4771 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 4772 assert(PLoc.isValid() && "Source location is expected to be always valid."); 4773 4774 llvm::sys::fs::UniqueID ID; 4775 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 4776 llvm_unreachable("Source file with target region no longer exists!"); 4777 4778 DeviceID = ID.getDevice(); 4779 FileID = ID.getFile(); 4780 LineNum = PLoc.getLine(); 4781 } 4782 4783 void CGOpenMPRuntime::emitTargetOutlinedFunction( 4784 const OMPExecutableDirective &D, StringRef ParentName, 4785 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4786 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4787 assert(!ParentName.empty() && "Invalid target region parent name!"); 4788 4789 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 4790 IsOffloadEntry, CodeGen); 4791 } 4792 4793 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 4794 const OMPExecutableDirective &D, StringRef ParentName, 4795 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4796 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4797 // Create a unique name for the entry function using the source location 4798 // information of the current target region. The name will be something like: 4799 // 4800 // __omp_offloading_DD_FFFF_PP_lBB 4801 // 4802 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 4803 // mangled name of the function that encloses the target region and BB is the 4804 // line number of the target region. 4805 4806 unsigned DeviceID; 4807 unsigned FileID; 4808 unsigned Line; 4809 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 4810 Line); 4811 SmallString<64> EntryFnName; 4812 { 4813 llvm::raw_svector_ostream OS(EntryFnName); 4814 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 4815 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 4816 } 4817 4818 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4819 4820 CodeGenFunction CGF(CGM, true); 4821 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 4822 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4823 4824 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 4825 4826 // If this target outline function is not an offload entry, we don't need to 4827 // register it. 4828 if (!IsOffloadEntry) 4829 return; 4830 4831 // The target region ID is used by the runtime library to identify the current 4832 // target region, so it only has to be unique and not necessarily point to 4833 // anything. It could be the pointer to the outlined function that implements 4834 // the target region, but we aren't using that so that the compiler doesn't 4835 // need to keep that, and could therefore inline the host function if proven 4836 // worthwhile during optimization. In the other hand, if emitting code for the 4837 // device, the ID has to be the function address so that it can retrieved from 4838 // the offloading entry and launched by the runtime library. We also mark the 4839 // outlined function to have external linkage in case we are emitting code for 4840 // the device, because these functions will be entry points to the device. 4841 4842 if (CGM.getLangOpts().OpenMPIsDevice) { 4843 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 4844 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 4845 } else 4846 OutlinedFnID = new llvm::GlobalVariable( 4847 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 4848 llvm::GlobalValue::PrivateLinkage, 4849 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 4850 4851 // Register the information for the entry associated with this target region. 4852 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 4853 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); 4854 } 4855 4856 /// discard all CompoundStmts intervening between two constructs 4857 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 4858 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 4859 Body = CS->body_front(); 4860 4861 return Body; 4862 } 4863 4864 /// \brief Emit the num_teams clause of an enclosed teams directive at the 4865 /// target region scope. If there is no teams directive associated with the 4866 /// target directive, or if there is no num_teams clause associated with the 4867 /// enclosed teams directive, return nullptr. 4868 static llvm::Value * 4869 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4870 CodeGenFunction &CGF, 4871 const OMPExecutableDirective &D) { 4872 4873 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4874 "teams directive expected to be " 4875 "emitted only for the host!"); 4876 4877 // FIXME: For the moment we do not support combined directives with target and 4878 // teams, so we do not expect to get any num_teams clause in the provided 4879 // directive. Once we support that, this assertion can be replaced by the 4880 // actual emission of the clause expression. 4881 assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr && 4882 "Not expecting clause in directive."); 4883 4884 // If the current target region has a teams region enclosed, we need to get 4885 // the number of teams to pass to the runtime function call. This is done 4886 // by generating the expression in a inlined region. This is required because 4887 // the expression is captured in the enclosing target environment when the 4888 // teams directive is not combined with target. 4889 4890 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4891 4892 // FIXME: Accommodate other combined directives with teams when they become 4893 // available. 4894 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4895 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4896 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 4897 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4898 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4899 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 4900 return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, 4901 /*IsSigned=*/true); 4902 } 4903 4904 // If we have an enclosed teams directive but no num_teams clause we use 4905 // the default value 0. 4906 return CGF.Builder.getInt32(0); 4907 } 4908 4909 // No teams associated with the directive. 4910 return nullptr; 4911 } 4912 4913 /// \brief Emit the thread_limit clause of an enclosed teams directive at the 4914 /// target region scope. If there is no teams directive associated with the 4915 /// target directive, or if there is no thread_limit clause associated with the 4916 /// enclosed teams directive, return nullptr. 4917 static llvm::Value * 4918 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4919 CodeGenFunction &CGF, 4920 const OMPExecutableDirective &D) { 4921 4922 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4923 "teams directive expected to be " 4924 "emitted only for the host!"); 4925 4926 // FIXME: For the moment we do not support combined directives with target and 4927 // teams, so we do not expect to get any thread_limit clause in the provided 4928 // directive. Once we support that, this assertion can be replaced by the 4929 // actual emission of the clause expression. 4930 assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr && 4931 "Not expecting clause in directive."); 4932 4933 // If the current target region has a teams region enclosed, we need to get 4934 // the thread limit to pass to the runtime function call. This is done 4935 // by generating the expression in a inlined region. This is required because 4936 // the expression is captured in the enclosing target environment when the 4937 // teams directive is not combined with target. 4938 4939 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4940 4941 // FIXME: Accommodate other combined directives with teams when they become 4942 // available. 4943 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4944 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4945 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 4946 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4947 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4948 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 4949 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 4950 /*IsSigned=*/true); 4951 } 4952 4953 // If we have an enclosed teams directive but no thread_limit clause we use 4954 // the default value 0. 4955 return CGF.Builder.getInt32(0); 4956 } 4957 4958 // No teams associated with the directive. 4959 return nullptr; 4960 } 4961 4962 namespace { 4963 // \brief Utility to handle information from clauses associated with a given 4964 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 4965 // It provides a convenient interface to obtain the information and generate 4966 // code for that information. 4967 class MappableExprsHandler { 4968 public: 4969 /// \brief Values for bit flags used to specify the mapping type for 4970 /// offloading. 4971 enum OpenMPOffloadMappingFlags { 4972 /// \brief Allocate memory on the device and move data from host to device. 4973 OMP_MAP_TO = 0x01, 4974 /// \brief Allocate memory on the device and move data from device to host. 4975 OMP_MAP_FROM = 0x02, 4976 /// \brief Always perform the requested mapping action on the element, even 4977 /// if it was already mapped before. 4978 OMP_MAP_ALWAYS = 0x04, 4979 /// \brief Delete the element from the device environment, ignoring the 4980 /// current reference count associated with the element. 4981 OMP_MAP_DELETE = 0x08, 4982 /// \brief The element being mapped is a pointer, therefore the pointee 4983 /// should be mapped as well. 4984 OMP_MAP_IS_PTR = 0x10, 4985 /// \brief This flags signals that an argument is the first one relating to 4986 /// a map/private clause expression. For some cases a single 4987 /// map/privatization results in multiple arguments passed to the runtime 4988 /// library. 4989 OMP_MAP_FIRST_REF = 0x20, 4990 /// \brief Signal that the runtime library has to return the device pointer 4991 /// in the current position for the data being mapped. 4992 OMP_MAP_RETURN_PTR = 0x40, 4993 /// \brief This flag signals that the reference being passed is a pointer to 4994 /// private data. 4995 OMP_MAP_PRIVATE_PTR = 0x80, 4996 /// \brief Pass the element to the device by value. 4997 OMP_MAP_PRIVATE_VAL = 0x100, 4998 }; 4999 5000 /// Class that associates information with a base pointer to be passed to the 5001 /// runtime library. 5002 class BasePointerInfo { 5003 /// The base pointer. 5004 llvm::Value *Ptr = nullptr; 5005 /// The base declaration that refers to this device pointer, or null if 5006 /// there is none. 5007 const ValueDecl *DevPtrDecl = nullptr; 5008 5009 public: 5010 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 5011 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 5012 llvm::Value *operator*() const { return Ptr; } 5013 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 5014 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 5015 }; 5016 5017 typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; 5018 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 5019 typedef SmallVector<unsigned, 16> MapFlagsArrayTy; 5020 5021 private: 5022 /// \brief Directive from where the map clauses were extracted. 5023 const OMPExecutableDirective &CurDir; 5024 5025 /// \brief Function the directive is being generated for. 5026 CodeGenFunction &CGF; 5027 5028 /// \brief Set of all first private variables in the current directive. 5029 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 5030 5031 /// Map between device pointer declarations and their expression components. 5032 /// The key value for declarations in 'this' is null. 5033 llvm::DenseMap< 5034 const ValueDecl *, 5035 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 5036 DevPointersMap; 5037 5038 llvm::Value *getExprTypeSize(const Expr *E) const { 5039 auto ExprTy = E->getType().getCanonicalType(); 5040 5041 // Reference types are ignored for mapping purposes. 5042 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 5043 ExprTy = RefTy->getPointeeType().getCanonicalType(); 5044 5045 // Given that an array section is considered a built-in type, we need to 5046 // do the calculation based on the length of the section instead of relying 5047 // on CGF.getTypeSize(E->getType()). 5048 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 5049 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 5050 OAE->getBase()->IgnoreParenImpCasts()) 5051 .getCanonicalType(); 5052 5053 // If there is no length associated with the expression, that means we 5054 // are using the whole length of the base. 5055 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 5056 return CGF.getTypeSize(BaseTy); 5057 5058 llvm::Value *ElemSize; 5059 if (auto *PTy = BaseTy->getAs<PointerType>()) 5060 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 5061 else { 5062 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 5063 assert(ATy && "Expecting array type if not a pointer type."); 5064 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 5065 } 5066 5067 // If we don't have a length at this point, that is because we have an 5068 // array section with a single element. 5069 if (!OAE->getLength()) 5070 return ElemSize; 5071 5072 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 5073 LengthVal = 5074 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 5075 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 5076 } 5077 return CGF.getTypeSize(ExprTy); 5078 } 5079 5080 /// \brief Return the corresponding bits for a given map clause modifier. Add 5081 /// a flag marking the map as a pointer if requested. Add a flag marking the 5082 /// map as the first one of a series of maps that relate to the same map 5083 /// expression. 5084 unsigned getMapTypeBits(OpenMPMapClauseKind MapType, 5085 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 5086 bool AddIsFirstFlag) const { 5087 unsigned Bits = 0u; 5088 switch (MapType) { 5089 case OMPC_MAP_alloc: 5090 case OMPC_MAP_release: 5091 // alloc and release is the default behavior in the runtime library, i.e. 5092 // if we don't pass any bits alloc/release that is what the runtime is 5093 // going to do. Therefore, we don't need to signal anything for these two 5094 // type modifiers. 5095 break; 5096 case OMPC_MAP_to: 5097 Bits = OMP_MAP_TO; 5098 break; 5099 case OMPC_MAP_from: 5100 Bits = OMP_MAP_FROM; 5101 break; 5102 case OMPC_MAP_tofrom: 5103 Bits = OMP_MAP_TO | OMP_MAP_FROM; 5104 break; 5105 case OMPC_MAP_delete: 5106 Bits = OMP_MAP_DELETE; 5107 break; 5108 default: 5109 llvm_unreachable("Unexpected map type!"); 5110 break; 5111 } 5112 if (AddPtrFlag) 5113 Bits |= OMP_MAP_IS_PTR; 5114 if (AddIsFirstFlag) 5115 Bits |= OMP_MAP_FIRST_REF; 5116 if (MapTypeModifier == OMPC_MAP_always) 5117 Bits |= OMP_MAP_ALWAYS; 5118 return Bits; 5119 } 5120 5121 /// \brief Return true if the provided expression is a final array section. A 5122 /// final array section, is one whose length can't be proved to be one. 5123 bool isFinalArraySectionExpression(const Expr *E) const { 5124 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 5125 5126 // It is not an array section and therefore not a unity-size one. 5127 if (!OASE) 5128 return false; 5129 5130 // An array section with no colon always refer to a single element. 5131 if (OASE->getColonLoc().isInvalid()) 5132 return false; 5133 5134 auto *Length = OASE->getLength(); 5135 5136 // If we don't have a length we have to check if the array has size 1 5137 // for this dimension. Also, we should always expect a length if the 5138 // base type is pointer. 5139 if (!Length) { 5140 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 5141 OASE->getBase()->IgnoreParenImpCasts()) 5142 .getCanonicalType(); 5143 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 5144 return ATy->getSize().getSExtValue() != 1; 5145 // If we don't have a constant dimension length, we have to consider 5146 // the current section as having any size, so it is not necessarily 5147 // unitary. If it happen to be unity size, that's user fault. 5148 return true; 5149 } 5150 5151 // Check if the length evaluates to 1. 5152 llvm::APSInt ConstLength; 5153 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 5154 return true; // Can have more that size 1. 5155 5156 return ConstLength.getSExtValue() != 1; 5157 } 5158 5159 /// \brief Generate the base pointers, section pointers, sizes and map type 5160 /// bits for the provided map type, map modifier, and expression components. 5161 /// \a IsFirstComponent should be set to true if the provided set of 5162 /// components is the first associated with a capture. 5163 void generateInfoForComponentList( 5164 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5165 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5166 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 5167 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 5168 bool IsFirstComponentList) const { 5169 5170 // The following summarizes what has to be generated for each map and the 5171 // types bellow. The generated information is expressed in this order: 5172 // base pointer, section pointer, size, flags 5173 // (to add to the ones that come from the map type and modifier). 5174 // 5175 // double d; 5176 // int i[100]; 5177 // float *p; 5178 // 5179 // struct S1 { 5180 // int i; 5181 // float f[50]; 5182 // } 5183 // struct S2 { 5184 // int i; 5185 // float f[50]; 5186 // S1 s; 5187 // double *p; 5188 // struct S2 *ps; 5189 // } 5190 // S2 s; 5191 // S2 *ps; 5192 // 5193 // map(d) 5194 // &d, &d, sizeof(double), noflags 5195 // 5196 // map(i) 5197 // &i, &i, 100*sizeof(int), noflags 5198 // 5199 // map(i[1:23]) 5200 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 5201 // 5202 // map(p) 5203 // &p, &p, sizeof(float*), noflags 5204 // 5205 // map(p[1:24]) 5206 // p, &p[1], 24*sizeof(float), noflags 5207 // 5208 // map(s) 5209 // &s, &s, sizeof(S2), noflags 5210 // 5211 // map(s.i) 5212 // &s, &(s.i), sizeof(int), noflags 5213 // 5214 // map(s.s.f) 5215 // &s, &(s.i.f), 50*sizeof(int), noflags 5216 // 5217 // map(s.p) 5218 // &s, &(s.p), sizeof(double*), noflags 5219 // 5220 // map(s.p[:22], s.a s.b) 5221 // &s, &(s.p), sizeof(double*), noflags 5222 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag 5223 // 5224 // map(s.ps) 5225 // &s, &(s.ps), sizeof(S2*), noflags 5226 // 5227 // map(s.ps->s.i) 5228 // &s, &(s.ps), sizeof(S2*), noflags 5229 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag 5230 // 5231 // map(s.ps->ps) 5232 // &s, &(s.ps), sizeof(S2*), noflags 5233 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5234 // 5235 // map(s.ps->ps->ps) 5236 // &s, &(s.ps), sizeof(S2*), noflags 5237 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5238 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5239 // 5240 // map(s.ps->ps->s.f[:22]) 5241 // &s, &(s.ps), sizeof(S2*), noflags 5242 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5243 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag 5244 // 5245 // map(ps) 5246 // &ps, &ps, sizeof(S2*), noflags 5247 // 5248 // map(ps->i) 5249 // ps, &(ps->i), sizeof(int), noflags 5250 // 5251 // map(ps->s.f) 5252 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 5253 // 5254 // map(ps->p) 5255 // ps, &(ps->p), sizeof(double*), noflags 5256 // 5257 // map(ps->p[:22]) 5258 // ps, &(ps->p), sizeof(double*), noflags 5259 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag 5260 // 5261 // map(ps->ps) 5262 // ps, &(ps->ps), sizeof(S2*), noflags 5263 // 5264 // map(ps->ps->s.i) 5265 // ps, &(ps->ps), sizeof(S2*), noflags 5266 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag 5267 // 5268 // map(ps->ps->ps) 5269 // ps, &(ps->ps), sizeof(S2*), noflags 5270 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5271 // 5272 // map(ps->ps->ps->ps) 5273 // ps, &(ps->ps), sizeof(S2*), noflags 5274 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5275 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5276 // 5277 // map(ps->ps->ps->s.f[:22]) 5278 // ps, &(ps->ps), sizeof(S2*), noflags 5279 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5280 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + 5281 // extra_flag 5282 5283 // Track if the map information being generated is the first for a capture. 5284 bool IsCaptureFirstInfo = IsFirstComponentList; 5285 5286 // Scan the components from the base to the complete expression. 5287 auto CI = Components.rbegin(); 5288 auto CE = Components.rend(); 5289 auto I = CI; 5290 5291 // Track if the map information being generated is the first for a list of 5292 // components. 5293 bool IsExpressionFirstInfo = true; 5294 llvm::Value *BP = nullptr; 5295 5296 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 5297 // The base is the 'this' pointer. The content of the pointer is going 5298 // to be the base of the field being mapped. 5299 BP = CGF.EmitScalarExpr(ME->getBase()); 5300 } else { 5301 // The base is the reference to the variable. 5302 // BP = &Var. 5303 BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) 5304 .getPointer(); 5305 5306 // If the variable is a pointer and is being dereferenced (i.e. is not 5307 // the last component), the base has to be the pointer itself, not its 5308 // reference. References are ignored for mapping purposes. 5309 QualType Ty = 5310 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 5311 if (Ty->isAnyPointerType() && std::next(I) != CE) { 5312 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); 5313 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 5314 Ty->castAs<PointerType>()) 5315 .getPointer(); 5316 5317 // We do not need to generate individual map information for the 5318 // pointer, it can be associated with the combined storage. 5319 ++I; 5320 } 5321 } 5322 5323 for (; I != CE; ++I) { 5324 auto Next = std::next(I); 5325 5326 // We need to generate the addresses and sizes if this is the last 5327 // component, if the component is a pointer or if it is an array section 5328 // whose length can't be proved to be one. If this is a pointer, it 5329 // becomes the base address for the following components. 5330 5331 // A final array section, is one whose length can't be proved to be one. 5332 bool IsFinalArraySection = 5333 isFinalArraySectionExpression(I->getAssociatedExpression()); 5334 5335 // Get information on whether the element is a pointer. Have to do a 5336 // special treatment for array sections given that they are built-in 5337 // types. 5338 const auto *OASE = 5339 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 5340 bool IsPointer = 5341 (OASE && 5342 OMPArraySectionExpr::getBaseOriginalType(OASE) 5343 .getCanonicalType() 5344 ->isAnyPointerType()) || 5345 I->getAssociatedExpression()->getType()->isAnyPointerType(); 5346 5347 if (Next == CE || IsPointer || IsFinalArraySection) { 5348 5349 // If this is not the last component, we expect the pointer to be 5350 // associated with an array expression or member expression. 5351 assert((Next == CE || 5352 isa<MemberExpr>(Next->getAssociatedExpression()) || 5353 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 5354 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 5355 "Unexpected expression"); 5356 5357 auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); 5358 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 5359 5360 // If we have a member expression and the current component is a 5361 // reference, we have to map the reference too. Whenever we have a 5362 // reference, the section that reference refers to is going to be a 5363 // load instruction from the storage assigned to the reference. 5364 if (isa<MemberExpr>(I->getAssociatedExpression()) && 5365 I->getAssociatedDeclaration()->getType()->isReferenceType()) { 5366 auto *LI = cast<llvm::LoadInst>(LB); 5367 auto *RefAddr = LI->getPointerOperand(); 5368 5369 BasePointers.push_back(BP); 5370 Pointers.push_back(RefAddr); 5371 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 5372 Types.push_back(getMapTypeBits( 5373 /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, 5374 !IsExpressionFirstInfo, IsCaptureFirstInfo)); 5375 IsExpressionFirstInfo = false; 5376 IsCaptureFirstInfo = false; 5377 // The reference will be the next base address. 5378 BP = RefAddr; 5379 } 5380 5381 BasePointers.push_back(BP); 5382 Pointers.push_back(LB); 5383 Sizes.push_back(Size); 5384 5385 // We need to add a pointer flag for each map that comes from the 5386 // same expression except for the first one. We also need to signal 5387 // this map is the first one that relates with the current capture 5388 // (there is a set of entries for each capture). 5389 Types.push_back(getMapTypeBits(MapType, MapTypeModifier, 5390 !IsExpressionFirstInfo, 5391 IsCaptureFirstInfo)); 5392 5393 // If we have a final array section, we are done with this expression. 5394 if (IsFinalArraySection) 5395 break; 5396 5397 // The pointer becomes the base for the next element. 5398 if (Next != CE) 5399 BP = LB; 5400 5401 IsExpressionFirstInfo = false; 5402 IsCaptureFirstInfo = false; 5403 continue; 5404 } 5405 } 5406 } 5407 5408 /// \brief Return the adjusted map modifiers if the declaration a capture 5409 /// refers to appears in a first-private clause. This is expected to be used 5410 /// only with directives that start with 'target'. 5411 unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, 5412 unsigned CurrentModifiers) { 5413 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 5414 5415 // A first private variable captured by reference will use only the 5416 // 'private ptr' and 'map to' flag. Return the right flags if the captured 5417 // declaration is known as first-private in this handler. 5418 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 5419 return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | 5420 MappableExprsHandler::OMP_MAP_TO; 5421 5422 // We didn't modify anything. 5423 return CurrentModifiers; 5424 } 5425 5426 public: 5427 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 5428 : CurDir(Dir), CGF(CGF) { 5429 // Extract firstprivate clause information. 5430 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 5431 for (const auto *D : C->varlists()) 5432 FirstPrivateDecls.insert( 5433 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 5434 // Extract device pointer clause information. 5435 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 5436 for (auto L : C->component_lists()) 5437 DevPointersMap[L.first].push_back(L.second); 5438 } 5439 5440 /// \brief Generate all the base pointers, section pointers, sizes and map 5441 /// types for the extracted mappable expressions. Also, for each item that 5442 /// relates with a device pointer, a pair of the relevant declaration and 5443 /// index where it occurs is appended to the device pointers info array. 5444 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 5445 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 5446 MapFlagsArrayTy &Types) const { 5447 BasePointers.clear(); 5448 Pointers.clear(); 5449 Sizes.clear(); 5450 Types.clear(); 5451 5452 struct MapInfo { 5453 /// Kind that defines how a device pointer has to be returned. 5454 enum ReturnPointerKind { 5455 // Don't have to return any pointer. 5456 RPK_None, 5457 // Pointer is the base of the declaration. 5458 RPK_Base, 5459 // Pointer is a member of the base declaration - 'this' 5460 RPK_Member, 5461 // Pointer is a reference and a member of the base declaration - 'this' 5462 RPK_MemberReference, 5463 }; 5464 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 5465 OpenMPMapClauseKind MapType; 5466 OpenMPMapClauseKind MapTypeModifier; 5467 ReturnPointerKind ReturnDevicePointer; 5468 5469 MapInfo() 5470 : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), 5471 ReturnDevicePointer(RPK_None) {} 5472 MapInfo( 5473 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5474 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5475 ReturnPointerKind ReturnDevicePointer) 5476 : Components(Components), MapType(MapType), 5477 MapTypeModifier(MapTypeModifier), 5478 ReturnDevicePointer(ReturnDevicePointer) {} 5479 }; 5480 5481 // We have to process the component lists that relate with the same 5482 // declaration in a single chunk so that we can generate the map flags 5483 // correctly. Therefore, we organize all lists in a map. 5484 llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 5485 5486 // Helper function to fill the information map for the different supported 5487 // clauses. 5488 auto &&InfoGen = [&Info]( 5489 const ValueDecl *D, 5490 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 5491 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 5492 MapInfo::ReturnPointerKind ReturnDevicePointer) { 5493 const ValueDecl *VD = 5494 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 5495 Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); 5496 }; 5497 5498 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5499 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 5500 for (auto L : C->component_lists()) 5501 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 5502 MapInfo::RPK_None); 5503 for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 5504 for (auto L : C->component_lists()) 5505 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 5506 MapInfo::RPK_None); 5507 for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 5508 for (auto L : C->component_lists()) 5509 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 5510 MapInfo::RPK_None); 5511 5512 // Look at the use_device_ptr clause information and mark the existing map 5513 // entries as such. If there is no map information for an entry in the 5514 // use_device_ptr list, we create one with map type 'alloc' and zero size 5515 // section. It is the user fault if that was not mapped before. 5516 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5517 for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) 5518 for (auto L : C->component_lists()) { 5519 assert(!L.second.empty() && "Not expecting empty list of components!"); 5520 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 5521 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 5522 auto *IE = L.second.back().getAssociatedExpression(); 5523 // If the first component is a member expression, we have to look into 5524 // 'this', which maps to null in the map of map information. Otherwise 5525 // look directly for the information. 5526 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 5527 5528 // We potentially have map information for this declaration already. 5529 // Look for the first set of components that refer to it. 5530 if (It != Info.end()) { 5531 auto CI = std::find_if( 5532 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 5533 return MI.Components.back().getAssociatedDeclaration() == VD; 5534 }); 5535 // If we found a map entry, signal that the pointer has to be returned 5536 // and move on to the next declaration. 5537 if (CI != It->second.end()) { 5538 CI->ReturnDevicePointer = isa<MemberExpr>(IE) 5539 ? (VD->getType()->isReferenceType() 5540 ? MapInfo::RPK_MemberReference 5541 : MapInfo::RPK_Member) 5542 : MapInfo::RPK_Base; 5543 continue; 5544 } 5545 } 5546 5547 // We didn't find any match in our map information - generate a zero 5548 // size array section. 5549 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 5550 llvm::Value *Ptr = 5551 this->CGF 5552 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation()) 5553 .getScalarVal(); 5554 BasePointers.push_back({Ptr, VD}); 5555 Pointers.push_back(Ptr); 5556 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 5557 Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); 5558 } 5559 5560 for (auto &M : Info) { 5561 // We need to know when we generate information for the first component 5562 // associated with a capture, because the mapping flags depend on it. 5563 bool IsFirstComponentList = true; 5564 for (MapInfo &L : M.second) { 5565 assert(!L.Components.empty() && 5566 "Not expecting declaration with no component lists."); 5567 5568 // Remember the current base pointer index. 5569 unsigned CurrentBasePointersIdx = BasePointers.size(); 5570 // FIXME: MSVC 2013 seems to require this-> to find the member method. 5571 this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, 5572 L.Components, BasePointers, Pointers, 5573 Sizes, Types, IsFirstComponentList); 5574 5575 // If this entry relates with a device pointer, set the relevant 5576 // declaration and add the 'return pointer' flag. 5577 if (IsFirstComponentList && 5578 L.ReturnDevicePointer != MapInfo::RPK_None) { 5579 // If the pointer is not the base of the map, we need to skip the 5580 // base. If it is a reference in a member field, we also need to skip 5581 // the map of the reference. 5582 if (L.ReturnDevicePointer != MapInfo::RPK_Base) { 5583 ++CurrentBasePointersIdx; 5584 if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) 5585 ++CurrentBasePointersIdx; 5586 } 5587 assert(BasePointers.size() > CurrentBasePointersIdx && 5588 "Unexpected number of mapped base pointers."); 5589 5590 auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); 5591 assert(RelevantVD && 5592 "No relevant declaration related with device pointer??"); 5593 5594 BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 5595 Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; 5596 } 5597 IsFirstComponentList = false; 5598 } 5599 } 5600 } 5601 5602 /// \brief Generate the base pointers, section pointers, sizes and map types 5603 /// associated to a given capture. 5604 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 5605 llvm::Value *Arg, 5606 MapBaseValuesArrayTy &BasePointers, 5607 MapValuesArrayTy &Pointers, 5608 MapValuesArrayTy &Sizes, 5609 MapFlagsArrayTy &Types) const { 5610 assert(!Cap->capturesVariableArrayType() && 5611 "Not expecting to generate map info for a variable array type!"); 5612 5613 BasePointers.clear(); 5614 Pointers.clear(); 5615 Sizes.clear(); 5616 Types.clear(); 5617 5618 // We need to know when we generating information for the first component 5619 // associated with a capture, because the mapping flags depend on it. 5620 bool IsFirstComponentList = true; 5621 5622 const ValueDecl *VD = 5623 Cap->capturesThis() 5624 ? nullptr 5625 : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); 5626 5627 // If this declaration appears in a is_device_ptr clause we just have to 5628 // pass the pointer by value. If it is a reference to a declaration, we just 5629 // pass its value, otherwise, if it is a member expression, we need to map 5630 // 'to' the field. 5631 if (!VD) { 5632 auto It = DevPointersMap.find(VD); 5633 if (It != DevPointersMap.end()) { 5634 for (auto L : It->second) { 5635 generateInfoForComponentList( 5636 /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, 5637 BasePointers, Pointers, Sizes, Types, IsFirstComponentList); 5638 IsFirstComponentList = false; 5639 } 5640 return; 5641 } 5642 } else if (DevPointersMap.count(VD)) { 5643 BasePointers.push_back({Arg, VD}); 5644 Pointers.push_back(Arg); 5645 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 5646 Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF); 5647 return; 5648 } 5649 5650 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5651 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 5652 for (auto L : C->decl_component_lists(VD)) { 5653 assert(L.first == VD && 5654 "We got information for the wrong declaration??"); 5655 assert(!L.second.empty() && 5656 "Not expecting declaration with no component lists."); 5657 generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), 5658 L.second, BasePointers, Pointers, Sizes, 5659 Types, IsFirstComponentList); 5660 IsFirstComponentList = false; 5661 } 5662 5663 return; 5664 } 5665 5666 /// \brief Generate the default map information for a given capture \a CI, 5667 /// record field declaration \a RI and captured value \a CV. 5668 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 5669 const FieldDecl &RI, llvm::Value *CV, 5670 MapBaseValuesArrayTy &CurBasePointers, 5671 MapValuesArrayTy &CurPointers, 5672 MapValuesArrayTy &CurSizes, 5673 MapFlagsArrayTy &CurMapTypes) { 5674 5675 // Do the default mapping. 5676 if (CI.capturesThis()) { 5677 CurBasePointers.push_back(CV); 5678 CurPointers.push_back(CV); 5679 const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 5680 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 5681 // Default map type. 5682 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 5683 } else if (CI.capturesVariableByCopy()) { 5684 CurBasePointers.push_back(CV); 5685 CurPointers.push_back(CV); 5686 if (!RI.getType()->isAnyPointerType()) { 5687 // We have to signal to the runtime captures passed by value that are 5688 // not pointers. 5689 CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); 5690 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 5691 } else { 5692 // Pointers are implicitly mapped with a zero size and no flags 5693 // (other than first map that is added for all implicit maps). 5694 CurMapTypes.push_back(0u); 5695 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 5696 } 5697 } else { 5698 assert(CI.capturesVariable() && "Expected captured reference."); 5699 CurBasePointers.push_back(CV); 5700 CurPointers.push_back(CV); 5701 5702 const ReferenceType *PtrTy = 5703 cast<ReferenceType>(RI.getType().getTypePtr()); 5704 QualType ElementType = PtrTy->getPointeeType(); 5705 CurSizes.push_back(CGF.getTypeSize(ElementType)); 5706 // The default map type for a scalar/complex type is 'to' because by 5707 // default the value doesn't have to be retrieved. For an aggregate 5708 // type, the default is 'tofrom'. 5709 CurMapTypes.push_back(ElementType->isAggregateType() 5710 ? (OMP_MAP_TO | OMP_MAP_FROM) 5711 : OMP_MAP_TO); 5712 5713 // If we have a capture by reference we may need to add the private 5714 // pointer flag if the base declaration shows in some first-private 5715 // clause. 5716 CurMapTypes.back() = 5717 adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); 5718 } 5719 // Every default map produces a single argument, so, it is always the 5720 // first one. 5721 CurMapTypes.back() |= OMP_MAP_FIRST_REF; 5722 } 5723 }; 5724 5725 enum OpenMPOffloadingReservedDeviceIDs { 5726 /// \brief Device ID if the device was not defined, runtime should get it 5727 /// from environment variables in the spec. 5728 OMP_DEVICEID_UNDEF = -1, 5729 }; 5730 } // anonymous namespace 5731 5732 /// \brief Emit the arrays used to pass the captures and map information to the 5733 /// offloading runtime library. If there is no map or capture information, 5734 /// return nullptr by reference. 5735 static void 5736 emitOffloadingArrays(CodeGenFunction &CGF, 5737 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 5738 MappableExprsHandler::MapValuesArrayTy &Pointers, 5739 MappableExprsHandler::MapValuesArrayTy &Sizes, 5740 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 5741 CGOpenMPRuntime::TargetDataInfo &Info) { 5742 auto &CGM = CGF.CGM; 5743 auto &Ctx = CGF.getContext(); 5744 5745 // Reset the array information. 5746 Info.clearArrayInfo(); 5747 Info.NumberOfPtrs = BasePointers.size(); 5748 5749 if (Info.NumberOfPtrs) { 5750 // Detect if we have any capture size requiring runtime evaluation of the 5751 // size so that a constant array could be eventually used. 5752 bool hasRuntimeEvaluationCaptureSize = false; 5753 for (auto *S : Sizes) 5754 if (!isa<llvm::Constant>(S)) { 5755 hasRuntimeEvaluationCaptureSize = true; 5756 break; 5757 } 5758 5759 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 5760 QualType PointerArrayType = 5761 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 5762 /*IndexTypeQuals=*/0); 5763 5764 Info.BasePointersArray = 5765 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 5766 Info.PointersArray = 5767 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 5768 5769 // If we don't have any VLA types or other types that require runtime 5770 // evaluation, we can use a constant array for the map sizes, otherwise we 5771 // need to fill up the arrays as we do for the pointers. 5772 if (hasRuntimeEvaluationCaptureSize) { 5773 QualType SizeArrayType = Ctx.getConstantArrayType( 5774 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 5775 /*IndexTypeQuals=*/0); 5776 Info.SizesArray = 5777 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 5778 } else { 5779 // We expect all the sizes to be constant, so we collect them to create 5780 // a constant array. 5781 SmallVector<llvm::Constant *, 16> ConstSizes; 5782 for (auto S : Sizes) 5783 ConstSizes.push_back(cast<llvm::Constant>(S)); 5784 5785 auto *SizesArrayInit = llvm::ConstantArray::get( 5786 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 5787 auto *SizesArrayGbl = new llvm::GlobalVariable( 5788 CGM.getModule(), SizesArrayInit->getType(), 5789 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5790 SizesArrayInit, ".offload_sizes"); 5791 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 5792 Info.SizesArray = SizesArrayGbl; 5793 } 5794 5795 // The map types are always constant so we don't need to generate code to 5796 // fill arrays. Instead, we create an array constant. 5797 llvm::Constant *MapTypesArrayInit = 5798 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 5799 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 5800 CGM.getModule(), MapTypesArrayInit->getType(), 5801 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5802 MapTypesArrayInit, ".offload_maptypes"); 5803 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 5804 Info.MapTypesArray = MapTypesArrayGbl; 5805 5806 for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { 5807 llvm::Value *BPVal = *BasePointers[i]; 5808 if (BPVal->getType()->isPointerTy()) 5809 BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); 5810 else { 5811 assert(BPVal->getType()->isIntegerTy() && 5812 "If not a pointer, the value type must be an integer."); 5813 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); 5814 } 5815 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 5816 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5817 Info.BasePointersArray, 0, i); 5818 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5819 CGF.Builder.CreateStore(BPVal, BPAddr); 5820 5821 if (Info.requiresDevicePointerInfo()) 5822 if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) 5823 Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); 5824 5825 llvm::Value *PVal = Pointers[i]; 5826 if (PVal->getType()->isPointerTy()) 5827 PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); 5828 else { 5829 assert(PVal->getType()->isIntegerTy() && 5830 "If not a pointer, the value type must be an integer."); 5831 PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); 5832 } 5833 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 5834 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5835 Info.PointersArray, 0, i); 5836 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5837 CGF.Builder.CreateStore(PVal, PAddr); 5838 5839 if (hasRuntimeEvaluationCaptureSize) { 5840 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 5841 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 5842 Info.SizesArray, 5843 /*Idx0=*/0, 5844 /*Idx1=*/i); 5845 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 5846 CGF.Builder.CreateStore( 5847 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 5848 SAddr); 5849 } 5850 } 5851 } 5852 } 5853 /// \brief Emit the arguments to be passed to the runtime library based on the 5854 /// arrays of pointers, sizes and map types. 5855 static void emitOffloadingArraysArgument( 5856 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 5857 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 5858 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 5859 auto &CGM = CGF.CGM; 5860 if (Info.NumberOfPtrs) { 5861 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5862 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5863 Info.BasePointersArray, 5864 /*Idx0=*/0, /*Idx1=*/0); 5865 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5866 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5867 Info.PointersArray, 5868 /*Idx0=*/0, 5869 /*Idx1=*/0); 5870 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5871 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 5872 /*Idx0=*/0, /*Idx1=*/0); 5873 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5874 llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), 5875 Info.MapTypesArray, 5876 /*Idx0=*/0, 5877 /*Idx1=*/0); 5878 } else { 5879 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5880 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5881 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 5882 MapTypesArrayArg = 5883 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 5884 } 5885 } 5886 5887 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 5888 const OMPExecutableDirective &D, 5889 llvm::Value *OutlinedFn, 5890 llvm::Value *OutlinedFnID, 5891 const Expr *IfCond, const Expr *Device, 5892 ArrayRef<llvm::Value *> CapturedVars) { 5893 if (!CGF.HaveInsertPoint()) 5894 return; 5895 5896 assert(OutlinedFn && "Invalid outlined function!"); 5897 5898 auto &Ctx = CGF.getContext(); 5899 5900 // Fill up the arrays with all the captured variables. 5901 MappableExprsHandler::MapValuesArrayTy KernelArgs; 5902 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 5903 MappableExprsHandler::MapValuesArrayTy Pointers; 5904 MappableExprsHandler::MapValuesArrayTy Sizes; 5905 MappableExprsHandler::MapFlagsArrayTy MapTypes; 5906 5907 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 5908 MappableExprsHandler::MapValuesArrayTy CurPointers; 5909 MappableExprsHandler::MapValuesArrayTy CurSizes; 5910 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 5911 5912 // Get mappable expression information. 5913 MappableExprsHandler MEHandler(D, CGF); 5914 5915 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5916 auto RI = CS.getCapturedRecordDecl()->field_begin(); 5917 auto CV = CapturedVars.begin(); 5918 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 5919 CE = CS.capture_end(); 5920 CI != CE; ++CI, ++RI, ++CV) { 5921 StringRef Name; 5922 QualType Ty; 5923 5924 CurBasePointers.clear(); 5925 CurPointers.clear(); 5926 CurSizes.clear(); 5927 CurMapTypes.clear(); 5928 5929 // VLA sizes are passed to the outlined region by copy and do not have map 5930 // information associated. 5931 if (CI->capturesVariableArrayType()) { 5932 CurBasePointers.push_back(*CV); 5933 CurPointers.push_back(*CV); 5934 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 5935 // Copy to the device as an argument. No need to retrieve it. 5936 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | 5937 MappableExprsHandler::OMP_MAP_FIRST_REF); 5938 } else { 5939 // If we have any information in the map clause, we use it, otherwise we 5940 // just do a default mapping. 5941 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 5942 CurSizes, CurMapTypes); 5943 if (CurBasePointers.empty()) 5944 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 5945 CurPointers, CurSizes, CurMapTypes); 5946 } 5947 // We expect to have at least an element of information for this capture. 5948 assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); 5949 assert(CurBasePointers.size() == CurPointers.size() && 5950 CurBasePointers.size() == CurSizes.size() && 5951 CurBasePointers.size() == CurMapTypes.size() && 5952 "Inconsistent map information sizes!"); 5953 5954 // The kernel args are always the first elements of the base pointers 5955 // associated with a capture. 5956 KernelArgs.push_back(*CurBasePointers.front()); 5957 // We need to append the results of this capture to what we already have. 5958 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 5959 Pointers.append(CurPointers.begin(), CurPointers.end()); 5960 Sizes.append(CurSizes.begin(), CurSizes.end()); 5961 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 5962 } 5963 5964 // Keep track on whether the host function has to be executed. 5965 auto OffloadErrorQType = 5966 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 5967 auto OffloadError = CGF.MakeAddrLValue( 5968 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 5969 OffloadErrorQType); 5970 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 5971 OffloadError); 5972 5973 // Fill up the pointer arrays and transfer execution to the device. 5974 auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, 5975 OutlinedFnID, OffloadError, OffloadErrorQType, 5976 &D](CodeGenFunction &CGF, PrePostActionTy &) { 5977 auto &RT = CGF.CGM.getOpenMPRuntime(); 5978 // Emit the offloading arrays. 5979 TargetDataInfo Info; 5980 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 5981 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 5982 Info.PointersArray, Info.SizesArray, 5983 Info.MapTypesArray, Info); 5984 5985 // On top of the arrays that were filled up, the target offloading call 5986 // takes as arguments the device id as well as the host pointer. The host 5987 // pointer is used by the runtime library to identify the current target 5988 // region, so it only has to be unique and not necessarily point to 5989 // anything. It could be the pointer to the outlined function that 5990 // implements the target region, but we aren't using that so that the 5991 // compiler doesn't need to keep that, and could therefore inline the host 5992 // function if proven worthwhile during optimization. 5993 5994 // From this point on, we need to have an ID of the target region defined. 5995 assert(OutlinedFnID && "Invalid outlined function ID!"); 5996 5997 // Emit device ID if any. 5998 llvm::Value *DeviceID; 5999 if (Device) 6000 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6001 CGF.Int32Ty, /*isSigned=*/true); 6002 else 6003 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6004 6005 // Emit the number of elements in the offloading arrays. 6006 llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6007 6008 // Return value of the runtime offloading call. 6009 llvm::Value *Return; 6010 6011 auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); 6012 auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); 6013 6014 // If we have NumTeams defined this means that we have an enclosed teams 6015 // region. Therefore we also expect to have ThreadLimit defined. These two 6016 // values should be defined in the presence of a teams directive, regardless 6017 // of having any clauses associated. If the user is using teams but no 6018 // clauses, these two values will be the default that should be passed to 6019 // the runtime library - a 32-bit integer with the value zero. 6020 if (NumTeams) { 6021 assert(ThreadLimit && "Thread limit expression should be available along " 6022 "with number of teams."); 6023 llvm::Value *OffloadingArgs[] = { 6024 DeviceID, OutlinedFnID, 6025 PointerNum, Info.BasePointersArray, 6026 Info.PointersArray, Info.SizesArray, 6027 Info.MapTypesArray, NumTeams, 6028 ThreadLimit}; 6029 Return = CGF.EmitRuntimeCall( 6030 RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); 6031 } else { 6032 llvm::Value *OffloadingArgs[] = { 6033 DeviceID, OutlinedFnID, 6034 PointerNum, Info.BasePointersArray, 6035 Info.PointersArray, Info.SizesArray, 6036 Info.MapTypesArray}; 6037 Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), 6038 OffloadingArgs); 6039 } 6040 6041 CGF.EmitStoreOfScalar(Return, OffloadError); 6042 }; 6043 6044 // Notify that the host version must be executed. 6045 auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { 6046 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), 6047 OffloadError); 6048 }; 6049 6050 // If we have a target function ID it means that we need to support 6051 // offloading, otherwise, just execute on the host. We need to execute on host 6052 // regardless of the conditional in the if clause if, e.g., the user do not 6053 // specify target triples. 6054 if (OutlinedFnID) { 6055 if (IfCond) 6056 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6057 else { 6058 RegionCodeGenTy ThenRCG(ThenGen); 6059 ThenRCG(CGF); 6060 } 6061 } else { 6062 RegionCodeGenTy ElseRCG(ElseGen); 6063 ElseRCG(CGF); 6064 } 6065 6066 // Check the error code and execute the host version if required. 6067 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 6068 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 6069 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 6070 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 6071 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 6072 6073 CGF.EmitBlock(OffloadFailedBlock); 6074 CGF.Builder.CreateCall(OutlinedFn, KernelArgs); 6075 CGF.EmitBranch(OffloadContBlock); 6076 6077 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 6078 } 6079 6080 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 6081 StringRef ParentName) { 6082 if (!S) 6083 return; 6084 6085 // If we find a OMP target directive, codegen the outline function and 6086 // register the result. 6087 // FIXME: Add other directives with target when they become supported. 6088 bool isTargetDirective = isa<OMPTargetDirective>(S); 6089 6090 if (isTargetDirective) { 6091 auto *E = cast<OMPExecutableDirective>(S); 6092 unsigned DeviceID; 6093 unsigned FileID; 6094 unsigned Line; 6095 getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, 6096 FileID, Line); 6097 6098 // Is this a target region that should not be emitted as an entry point? If 6099 // so just signal we are done with this target region. 6100 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 6101 ParentName, Line)) 6102 return; 6103 6104 llvm::Function *Fn; 6105 llvm::Constant *Addr; 6106 std::tie(Fn, Addr) = 6107 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( 6108 CGM, cast<OMPTargetDirective>(*E), ParentName, 6109 /*isOffloadEntry=*/true); 6110 assert(Fn && Addr && "Target region emission failed."); 6111 return; 6112 } 6113 6114 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 6115 if (!E->hasAssociatedStmt()) 6116 return; 6117 6118 scanForTargetRegionsFunctions( 6119 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 6120 ParentName); 6121 return; 6122 } 6123 6124 // If this is a lambda function, look into its body. 6125 if (auto *L = dyn_cast<LambdaExpr>(S)) 6126 S = L->getBody(); 6127 6128 // Keep looking for target regions recursively. 6129 for (auto *II : S->children()) 6130 scanForTargetRegionsFunctions(II, ParentName); 6131 } 6132 6133 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 6134 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 6135 6136 // If emitting code for the host, we do not process FD here. Instead we do 6137 // the normal code generation. 6138 if (!CGM.getLangOpts().OpenMPIsDevice) 6139 return false; 6140 6141 // Try to detect target regions in the function. 6142 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 6143 6144 // We should not emit any function other that the ones created during the 6145 // scanning. Therefore, we signal that this function is completely dealt 6146 // with. 6147 return true; 6148 } 6149 6150 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 6151 if (!CGM.getLangOpts().OpenMPIsDevice) 6152 return false; 6153 6154 // Check if there are Ctors/Dtors in this declaration and look for target 6155 // regions in it. We use the complete variant to produce the kernel name 6156 // mangling. 6157 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 6158 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 6159 for (auto *Ctor : RD->ctors()) { 6160 StringRef ParentName = 6161 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 6162 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 6163 } 6164 auto *Dtor = RD->getDestructor(); 6165 if (Dtor) { 6166 StringRef ParentName = 6167 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 6168 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 6169 } 6170 } 6171 6172 // If we are in target mode we do not emit any global (declare target is not 6173 // implemented yet). Therefore we signal that GD was processed in this case. 6174 return true; 6175 } 6176 6177 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 6178 auto *VD = GD.getDecl(); 6179 if (isa<FunctionDecl>(VD)) 6180 return emitTargetFunctions(GD); 6181 6182 return emitTargetGlobalVariable(GD); 6183 } 6184 6185 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 6186 // If we have offloading in the current module, we need to emit the entries 6187 // now and register the offloading descriptor. 6188 createOffloadEntriesAndInfoMetadata(); 6189 6190 // Create and register the offloading binary descriptors. This is the main 6191 // entity that captures all the information about offloading in the current 6192 // compilation unit. 6193 return createOffloadingBinaryDescriptorRegistration(); 6194 } 6195 6196 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 6197 const OMPExecutableDirective &D, 6198 SourceLocation Loc, 6199 llvm::Value *OutlinedFn, 6200 ArrayRef<llvm::Value *> CapturedVars) { 6201 if (!CGF.HaveInsertPoint()) 6202 return; 6203 6204 auto *RTLoc = emitUpdateLocation(CGF, Loc); 6205 CodeGenFunction::RunCleanupsScope Scope(CGF); 6206 6207 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 6208 llvm::Value *Args[] = { 6209 RTLoc, 6210 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 6211 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 6212 llvm::SmallVector<llvm::Value *, 16> RealArgs; 6213 RealArgs.append(std::begin(Args), std::end(Args)); 6214 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 6215 6216 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 6217 CGF.EmitRuntimeCall(RTLFn, RealArgs); 6218 } 6219 6220 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 6221 const Expr *NumTeams, 6222 const Expr *ThreadLimit, 6223 SourceLocation Loc) { 6224 if (!CGF.HaveInsertPoint()) 6225 return; 6226 6227 auto *RTLoc = emitUpdateLocation(CGF, Loc); 6228 6229 llvm::Value *NumTeamsVal = 6230 (NumTeams) 6231 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 6232 CGF.CGM.Int32Ty, /* isSigned = */ true) 6233 : CGF.Builder.getInt32(0); 6234 6235 llvm::Value *ThreadLimitVal = 6236 (ThreadLimit) 6237 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 6238 CGF.CGM.Int32Ty, /* isSigned = */ true) 6239 : CGF.Builder.getInt32(0); 6240 6241 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 6242 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 6243 ThreadLimitVal}; 6244 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 6245 PushNumTeamsArgs); 6246 } 6247 6248 void CGOpenMPRuntime::emitTargetDataCalls( 6249 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6250 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 6251 if (!CGF.HaveInsertPoint()) 6252 return; 6253 6254 // Action used to replace the default codegen action and turn privatization 6255 // off. 6256 PrePostActionTy NoPrivAction; 6257 6258 // Generate the code for the opening of the data environment. Capture all the 6259 // arguments of the runtime call by reference because they are used in the 6260 // closing of the region. 6261 auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction]( 6262 CodeGenFunction &CGF, PrePostActionTy &) { 6263 // Fill up the arrays with all the mapped variables. 6264 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6265 MappableExprsHandler::MapValuesArrayTy Pointers; 6266 MappableExprsHandler::MapValuesArrayTy Sizes; 6267 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6268 6269 // Get map clause information. 6270 MappableExprsHandler MCHandler(D, CGF); 6271 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6272 6273 // Fill up the arrays and create the arguments. 6274 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6275 6276 llvm::Value *BasePointersArrayArg = nullptr; 6277 llvm::Value *PointersArrayArg = nullptr; 6278 llvm::Value *SizesArrayArg = nullptr; 6279 llvm::Value *MapTypesArrayArg = nullptr; 6280 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 6281 SizesArrayArg, MapTypesArrayArg, Info); 6282 6283 // Emit device ID if any. 6284 llvm::Value *DeviceID = nullptr; 6285 if (Device) 6286 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6287 CGF.Int32Ty, /*isSigned=*/true); 6288 else 6289 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6290 6291 // Emit the number of elements in the offloading arrays. 6292 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 6293 6294 llvm::Value *OffloadingArgs[] = { 6295 DeviceID, PointerNum, BasePointersArrayArg, 6296 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6297 auto &RT = CGF.CGM.getOpenMPRuntime(); 6298 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), 6299 OffloadingArgs); 6300 6301 // If device pointer privatization is required, emit the body of the region 6302 // here. It will have to be duplicated: with and without privatization. 6303 if (!Info.CaptureDeviceAddrMap.empty()) 6304 CodeGen(CGF); 6305 }; 6306 6307 // Generate code for the closing of the data region. 6308 auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF, 6309 PrePostActionTy &) { 6310 assert(Info.isValid() && "Invalid data environment closing arguments."); 6311 6312 llvm::Value *BasePointersArrayArg = nullptr; 6313 llvm::Value *PointersArrayArg = nullptr; 6314 llvm::Value *SizesArrayArg = nullptr; 6315 llvm::Value *MapTypesArrayArg = nullptr; 6316 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 6317 SizesArrayArg, MapTypesArrayArg, Info); 6318 6319 // Emit device ID if any. 6320 llvm::Value *DeviceID = nullptr; 6321 if (Device) 6322 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6323 CGF.Int32Ty, /*isSigned=*/true); 6324 else 6325 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6326 6327 // Emit the number of elements in the offloading arrays. 6328 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 6329 6330 llvm::Value *OffloadingArgs[] = { 6331 DeviceID, PointerNum, BasePointersArrayArg, 6332 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6333 auto &RT = CGF.CGM.getOpenMPRuntime(); 6334 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), 6335 OffloadingArgs); 6336 }; 6337 6338 // If we need device pointer privatization, we need to emit the body of the 6339 // region with no privatization in the 'else' branch of the conditional. 6340 // Otherwise, we don't have to do anything. 6341 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 6342 PrePostActionTy &) { 6343 if (!Info.CaptureDeviceAddrMap.empty()) { 6344 CodeGen.setAction(NoPrivAction); 6345 CodeGen(CGF); 6346 } 6347 }; 6348 6349 // We don't have to do anything to close the region if the if clause evaluates 6350 // to false. 6351 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6352 6353 if (IfCond) { 6354 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 6355 } else { 6356 RegionCodeGenTy RCG(BeginThenGen); 6357 RCG(CGF); 6358 } 6359 6360 // If we don't require privatization of device pointers, we emit the body in 6361 // between the runtime calls. This avoids duplicating the body code. 6362 if (Info.CaptureDeviceAddrMap.empty()) { 6363 CodeGen.setAction(NoPrivAction); 6364 CodeGen(CGF); 6365 } 6366 6367 if (IfCond) { 6368 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 6369 } else { 6370 RegionCodeGenTy RCG(EndThenGen); 6371 RCG(CGF); 6372 } 6373 } 6374 6375 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 6376 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6377 const Expr *Device) { 6378 if (!CGF.HaveInsertPoint()) 6379 return; 6380 6381 assert((isa<OMPTargetEnterDataDirective>(D) || 6382 isa<OMPTargetExitDataDirective>(D) || 6383 isa<OMPTargetUpdateDirective>(D)) && 6384 "Expecting either target enter, exit data, or update directives."); 6385 6386 // Generate the code for the opening of the data environment. 6387 auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { 6388 // Fill up the arrays with all the mapped variables. 6389 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6390 MappableExprsHandler::MapValuesArrayTy Pointers; 6391 MappableExprsHandler::MapValuesArrayTy Sizes; 6392 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6393 6394 // Get map clause information. 6395 MappableExprsHandler MEHandler(D, CGF); 6396 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6397 6398 // Fill up the arrays and create the arguments. 6399 TargetDataInfo Info; 6400 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6401 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 6402 Info.PointersArray, Info.SizesArray, 6403 Info.MapTypesArray, Info); 6404 6405 // Emit device ID if any. 6406 llvm::Value *DeviceID = nullptr; 6407 if (Device) 6408 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6409 CGF.Int32Ty, /*isSigned=*/true); 6410 else 6411 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6412 6413 // Emit the number of elements in the offloading arrays. 6414 auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6415 6416 llvm::Value *OffloadingArgs[] = { 6417 DeviceID, PointerNum, Info.BasePointersArray, 6418 Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; 6419 6420 auto &RT = CGF.CGM.getOpenMPRuntime(); 6421 // Select the right runtime function call for each expected standalone 6422 // directive. 6423 OpenMPRTLFunction RTLFn; 6424 switch (D.getDirectiveKind()) { 6425 default: 6426 llvm_unreachable("Unexpected standalone target data directive."); 6427 break; 6428 case OMPD_target_enter_data: 6429 RTLFn = OMPRTL__tgt_target_data_begin; 6430 break; 6431 case OMPD_target_exit_data: 6432 RTLFn = OMPRTL__tgt_target_data_end; 6433 break; 6434 case OMPD_target_update: 6435 RTLFn = OMPRTL__tgt_target_data_update; 6436 break; 6437 } 6438 CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); 6439 }; 6440 6441 // In the event we get an if clause, we don't have to take any action on the 6442 // else side. 6443 auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6444 6445 if (IfCond) { 6446 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6447 } else { 6448 RegionCodeGenTy ThenGenRCG(ThenGen); 6449 ThenGenRCG(CGF); 6450 } 6451 } 6452 6453 namespace { 6454 /// Kind of parameter in a function with 'declare simd' directive. 6455 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 6456 /// Attribute set of the parameter. 6457 struct ParamAttrTy { 6458 ParamKindTy Kind = Vector; 6459 llvm::APSInt StrideOrArg; 6460 llvm::APSInt Alignment; 6461 }; 6462 } // namespace 6463 6464 static unsigned evaluateCDTSize(const FunctionDecl *FD, 6465 ArrayRef<ParamAttrTy> ParamAttrs) { 6466 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 6467 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 6468 // of that clause. The VLEN value must be power of 2. 6469 // In other case the notion of the function`s "characteristic data type" (CDT) 6470 // is used to compute the vector length. 6471 // CDT is defined in the following order: 6472 // a) For non-void function, the CDT is the return type. 6473 // b) If the function has any non-uniform, non-linear parameters, then the 6474 // CDT is the type of the first such parameter. 6475 // c) If the CDT determined by a) or b) above is struct, union, or class 6476 // type which is pass-by-value (except for the type that maps to the 6477 // built-in complex data type), the characteristic data type is int. 6478 // d) If none of the above three cases is applicable, the CDT is int. 6479 // The VLEN is then determined based on the CDT and the size of vector 6480 // register of that ISA for which current vector version is generated. The 6481 // VLEN is computed using the formula below: 6482 // VLEN = sizeof(vector_register) / sizeof(CDT), 6483 // where vector register size specified in section 3.2.1 Registers and the 6484 // Stack Frame of original AMD64 ABI document. 6485 QualType RetType = FD->getReturnType(); 6486 if (RetType.isNull()) 6487 return 0; 6488 ASTContext &C = FD->getASTContext(); 6489 QualType CDT; 6490 if (!RetType.isNull() && !RetType->isVoidType()) 6491 CDT = RetType; 6492 else { 6493 unsigned Offset = 0; 6494 if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 6495 if (ParamAttrs[Offset].Kind == Vector) 6496 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 6497 ++Offset; 6498 } 6499 if (CDT.isNull()) { 6500 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 6501 if (ParamAttrs[I + Offset].Kind == Vector) { 6502 CDT = FD->getParamDecl(I)->getType(); 6503 break; 6504 } 6505 } 6506 } 6507 } 6508 if (CDT.isNull()) 6509 CDT = C.IntTy; 6510 CDT = CDT->getCanonicalTypeUnqualified(); 6511 if (CDT->isRecordType() || CDT->isUnionType()) 6512 CDT = C.IntTy; 6513 return C.getTypeSize(CDT); 6514 } 6515 6516 static void 6517 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 6518 const llvm::APSInt &VLENVal, 6519 ArrayRef<ParamAttrTy> ParamAttrs, 6520 OMPDeclareSimdDeclAttr::BranchStateTy State) { 6521 struct ISADataTy { 6522 char ISA; 6523 unsigned VecRegSize; 6524 }; 6525 ISADataTy ISAData[] = { 6526 { 6527 'b', 128 6528 }, // SSE 6529 { 6530 'c', 256 6531 }, // AVX 6532 { 6533 'd', 256 6534 }, // AVX2 6535 { 6536 'e', 512 6537 }, // AVX512 6538 }; 6539 llvm::SmallVector<char, 2> Masked; 6540 switch (State) { 6541 case OMPDeclareSimdDeclAttr::BS_Undefined: 6542 Masked.push_back('N'); 6543 Masked.push_back('M'); 6544 break; 6545 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 6546 Masked.push_back('N'); 6547 break; 6548 case OMPDeclareSimdDeclAttr::BS_Inbranch: 6549 Masked.push_back('M'); 6550 break; 6551 } 6552 for (auto Mask : Masked) { 6553 for (auto &Data : ISAData) { 6554 SmallString<256> Buffer; 6555 llvm::raw_svector_ostream Out(Buffer); 6556 Out << "_ZGV" << Data.ISA << Mask; 6557 if (!VLENVal) { 6558 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 6559 evaluateCDTSize(FD, ParamAttrs)); 6560 } else 6561 Out << VLENVal; 6562 for (auto &ParamAttr : ParamAttrs) { 6563 switch (ParamAttr.Kind){ 6564 case LinearWithVarStride: 6565 Out << 's' << ParamAttr.StrideOrArg; 6566 break; 6567 case Linear: 6568 Out << 'l'; 6569 if (!!ParamAttr.StrideOrArg) 6570 Out << ParamAttr.StrideOrArg; 6571 break; 6572 case Uniform: 6573 Out << 'u'; 6574 break; 6575 case Vector: 6576 Out << 'v'; 6577 break; 6578 } 6579 if (!!ParamAttr.Alignment) 6580 Out << 'a' << ParamAttr.Alignment; 6581 } 6582 Out << '_' << Fn->getName(); 6583 Fn->addFnAttr(Out.str()); 6584 } 6585 } 6586 } 6587 6588 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 6589 llvm::Function *Fn) { 6590 ASTContext &C = CGM.getContext(); 6591 FD = FD->getCanonicalDecl(); 6592 // Map params to their positions in function decl. 6593 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 6594 if (isa<CXXMethodDecl>(FD)) 6595 ParamPositions.insert({FD, 0}); 6596 unsigned ParamPos = ParamPositions.size(); 6597 for (auto *P : FD->parameters()) { 6598 ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); 6599 ++ParamPos; 6600 } 6601 for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 6602 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 6603 // Mark uniform parameters. 6604 for (auto *E : Attr->uniforms()) { 6605 E = E->IgnoreParenImpCasts(); 6606 unsigned Pos; 6607 if (isa<CXXThisExpr>(E)) 6608 Pos = ParamPositions[FD]; 6609 else { 6610 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6611 ->getCanonicalDecl(); 6612 Pos = ParamPositions[PVD]; 6613 } 6614 ParamAttrs[Pos].Kind = Uniform; 6615 } 6616 // Get alignment info. 6617 auto NI = Attr->alignments_begin(); 6618 for (auto *E : Attr->aligneds()) { 6619 E = E->IgnoreParenImpCasts(); 6620 unsigned Pos; 6621 QualType ParmTy; 6622 if (isa<CXXThisExpr>(E)) { 6623 Pos = ParamPositions[FD]; 6624 ParmTy = E->getType(); 6625 } else { 6626 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6627 ->getCanonicalDecl(); 6628 Pos = ParamPositions[PVD]; 6629 ParmTy = PVD->getType(); 6630 } 6631 ParamAttrs[Pos].Alignment = 6632 (*NI) ? (*NI)->EvaluateKnownConstInt(C) 6633 : llvm::APSInt::getUnsigned( 6634 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 6635 .getQuantity()); 6636 ++NI; 6637 } 6638 // Mark linear parameters. 6639 auto SI = Attr->steps_begin(); 6640 auto MI = Attr->modifiers_begin(); 6641 for (auto *E : Attr->linears()) { 6642 E = E->IgnoreParenImpCasts(); 6643 unsigned Pos; 6644 if (isa<CXXThisExpr>(E)) 6645 Pos = ParamPositions[FD]; 6646 else { 6647 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6648 ->getCanonicalDecl(); 6649 Pos = ParamPositions[PVD]; 6650 } 6651 auto &ParamAttr = ParamAttrs[Pos]; 6652 ParamAttr.Kind = Linear; 6653 if (*SI) { 6654 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 6655 Expr::SE_AllowSideEffects)) { 6656 if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 6657 if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 6658 ParamAttr.Kind = LinearWithVarStride; 6659 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 6660 ParamPositions[StridePVD->getCanonicalDecl()]); 6661 } 6662 } 6663 } 6664 } 6665 ++SI; 6666 ++MI; 6667 } 6668 llvm::APSInt VLENVal; 6669 if (const Expr *VLEN = Attr->getSimdlen()) 6670 VLENVal = VLEN->EvaluateKnownConstInt(C); 6671 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 6672 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 6673 CGM.getTriple().getArch() == llvm::Triple::x86_64) 6674 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 6675 } 6676 } 6677 6678 namespace { 6679 /// Cleanup action for doacross support. 6680 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 6681 public: 6682 static const int DoacrossFinArgs = 2; 6683 6684 private: 6685 llvm::Value *RTLFn; 6686 llvm::Value *Args[DoacrossFinArgs]; 6687 6688 public: 6689 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 6690 : RTLFn(RTLFn) { 6691 assert(CallArgs.size() == DoacrossFinArgs); 6692 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 6693 } 6694 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 6695 if (!CGF.HaveInsertPoint()) 6696 return; 6697 CGF.EmitRuntimeCall(RTLFn, Args); 6698 } 6699 }; 6700 } // namespace 6701 6702 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 6703 const OMPLoopDirective &D) { 6704 if (!CGF.HaveInsertPoint()) 6705 return; 6706 6707 ASTContext &C = CGM.getContext(); 6708 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 6709 RecordDecl *RD; 6710 if (KmpDimTy.isNull()) { 6711 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 6712 // kmp_int64 lo; // lower 6713 // kmp_int64 up; // upper 6714 // kmp_int64 st; // stride 6715 // }; 6716 RD = C.buildImplicitRecord("kmp_dim"); 6717 RD->startDefinition(); 6718 addFieldToRecordDecl(C, RD, Int64Ty); 6719 addFieldToRecordDecl(C, RD, Int64Ty); 6720 addFieldToRecordDecl(C, RD, Int64Ty); 6721 RD->completeDefinition(); 6722 KmpDimTy = C.getRecordType(RD); 6723 } else 6724 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 6725 6726 Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); 6727 CGF.EmitNullInitialization(DimsAddr, KmpDimTy); 6728 enum { LowerFD = 0, UpperFD, StrideFD }; 6729 // Fill dims with data. 6730 LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); 6731 // dims.upper = num_iterations; 6732 LValue UpperLVal = 6733 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); 6734 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 6735 CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), 6736 Int64Ty, D.getNumIterations()->getExprLoc()); 6737 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 6738 // dims.stride = 1; 6739 LValue StrideLVal = 6740 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); 6741 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 6742 StrideLVal); 6743 6744 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 6745 // kmp_int32 num_dims, struct kmp_dim * dims); 6746 llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), 6747 getThreadID(CGF, D.getLocStart()), 6748 llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), 6749 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6750 DimsAddr.getPointer(), CGM.VoidPtrTy)}; 6751 6752 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 6753 CGF.EmitRuntimeCall(RTLFn, Args); 6754 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 6755 emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; 6756 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 6757 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 6758 llvm::makeArrayRef(FiniArgs)); 6759 } 6760 6761 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 6762 const OMPDependClause *C) { 6763 QualType Int64Ty = 6764 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 6765 const Expr *CounterVal = C->getCounterValue(); 6766 assert(CounterVal); 6767 llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), 6768 CounterVal->getType(), Int64Ty, 6769 CounterVal->getExprLoc()); 6770 Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); 6771 CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); 6772 llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), 6773 getThreadID(CGF, C->getLocStart()), 6774 CntAddr.getPointer()}; 6775 llvm::Value *RTLFn; 6776 if (C->getDependencyKind() == OMPC_DEPEND_source) 6777 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 6778 else { 6779 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 6780 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 6781 } 6782 CGF.EmitRuntimeCall(RTLFn, Args); 6783 } 6784 6785