1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/Bitcode/BitcodeReader.h" 23 #include "llvm/IR/CallSite.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// \brief Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// \brief Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// \brief Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// \brief Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// \brief Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// \brief Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// \brief Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// \brief Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// \brief Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// \brief API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// \brief Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// \brief Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// \brief A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// \brief API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); 150 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 151 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 152 CGF.EmitBlock(DoneBB); 153 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 154 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 155 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 156 CGF.Builder.GetInsertBlock()); 157 emitUntiedSwitch(CGF); 158 } 159 } 160 void emitUntiedSwitch(CodeGenFunction &CGF) const { 161 if (Untied) { 162 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 166 PartIdLVal); 167 UntiedCodeGen(CGF); 168 CodeGenFunction::JumpDest CurPoint = 169 CGF.getJumpDestInCurrentScope(".untied.next."); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 173 CGF.Builder.GetInsertBlock()); 174 CGF.EmitBranchThroughCleanup(CurPoint); 175 CGF.EmitBlock(CurPoint.getBlock()); 176 } 177 } 178 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 179 }; 180 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 181 const VarDecl *ThreadIDVar, 182 const RegionCodeGenTy &CodeGen, 183 OpenMPDirectiveKind Kind, bool HasCancel, 184 const UntiedTaskActionTy &Action) 185 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 186 ThreadIDVar(ThreadIDVar), Action(Action) { 187 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 188 } 189 190 /// \brief Get a variable or parameter for storing global thread id 191 /// inside OpenMP construct. 192 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 193 194 /// \brief Get an LValue for the current ThreadID variable. 195 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 196 197 /// \brief Get the name of the capture helper. 198 StringRef getHelperName() const override { return ".omp_outlined."; } 199 200 void emitUntiedSwitch(CodeGenFunction &CGF) override { 201 Action.emitUntiedSwitch(CGF); 202 } 203 204 static bool classof(const CGCapturedStmtInfo *Info) { 205 return CGOpenMPRegionInfo::classof(Info) && 206 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 207 TaskOutlinedRegion; 208 } 209 210 private: 211 /// \brief A variable or parameter storing global thread id for OpenMP 212 /// constructs. 213 const VarDecl *ThreadIDVar; 214 /// Action for emitting code for untied tasks. 215 const UntiedTaskActionTy &Action; 216 }; 217 218 /// \brief API for inlined captured statement code generation in OpenMP 219 /// constructs. 220 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 221 public: 222 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 223 const RegionCodeGenTy &CodeGen, 224 OpenMPDirectiveKind Kind, bool HasCancel) 225 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 226 OldCSI(OldCSI), 227 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 228 229 // \brief Retrieve the value of the context parameter. 230 llvm::Value *getContextValue() const override { 231 if (OuterRegionInfo) 232 return OuterRegionInfo->getContextValue(); 233 llvm_unreachable("No context value for inlined OpenMP region"); 234 } 235 236 void setContextValue(llvm::Value *V) override { 237 if (OuterRegionInfo) { 238 OuterRegionInfo->setContextValue(V); 239 return; 240 } 241 llvm_unreachable("No context value for inlined OpenMP region"); 242 } 243 244 /// \brief Lookup the captured field decl for a variable. 245 const FieldDecl *lookup(const VarDecl *VD) const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->lookup(VD); 248 // If there is no outer outlined region,no need to lookup in a list of 249 // captured variables, we can use the original one. 250 return nullptr; 251 } 252 253 FieldDecl *getThisFieldDecl() const override { 254 if (OuterRegionInfo) 255 return OuterRegionInfo->getThisFieldDecl(); 256 return nullptr; 257 } 258 259 /// \brief Get a variable or parameter for storing global thread id 260 /// inside OpenMP construct. 261 const VarDecl *getThreadIDVariable() const override { 262 if (OuterRegionInfo) 263 return OuterRegionInfo->getThreadIDVariable(); 264 return nullptr; 265 } 266 267 /// \brief Get the name of the capture helper. 268 StringRef getHelperName() const override { 269 if (auto *OuterRegionInfo = getOldCSI()) 270 return OuterRegionInfo->getHelperName(); 271 llvm_unreachable("No helper name for inlined OpenMP construct"); 272 } 273 274 void emitUntiedSwitch(CodeGenFunction &CGF) override { 275 if (OuterRegionInfo) 276 OuterRegionInfo->emitUntiedSwitch(CGF); 277 } 278 279 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 280 281 static bool classof(const CGCapturedStmtInfo *Info) { 282 return CGOpenMPRegionInfo::classof(Info) && 283 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 284 } 285 286 ~CGOpenMPInlinedRegionInfo() override = default; 287 288 private: 289 /// \brief CodeGen info about outer OpenMP region. 290 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 291 CGOpenMPRegionInfo *OuterRegionInfo; 292 }; 293 294 /// \brief API for captured statement code generation in OpenMP target 295 /// constructs. For this captures, implicit parameters are used instead of the 296 /// captured fields. The name of the target region has to be unique in a given 297 /// application so it is provided by the client, because only the client has 298 /// the information to generate that. 299 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 300 public: 301 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 302 const RegionCodeGenTy &CodeGen, StringRef HelperName) 303 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 304 /*HasCancel=*/false), 305 HelperName(HelperName) {} 306 307 /// \brief This is unused for target regions because each starts executing 308 /// with a single thread. 309 const VarDecl *getThreadIDVariable() const override { return nullptr; } 310 311 /// \brief Get the name of the capture helper. 312 StringRef getHelperName() const override { return HelperName; } 313 314 static bool classof(const CGCapturedStmtInfo *Info) { 315 return CGOpenMPRegionInfo::classof(Info) && 316 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 317 } 318 319 private: 320 StringRef HelperName; 321 }; 322 323 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 324 llvm_unreachable("No codegen for expressions"); 325 } 326 /// \brief API for generation of expressions captured in a innermost OpenMP 327 /// region. 328 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 329 public: 330 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 331 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 332 OMPD_unknown, 333 /*HasCancel=*/false), 334 PrivScope(CGF) { 335 // Make sure the globals captured in the provided statement are local by 336 // using the privatization logic. We assume the same variable is not 337 // captured more than once. 338 for (auto &C : CS.captures()) { 339 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 340 continue; 341 342 const VarDecl *VD = C.getCapturedVar(); 343 if (VD->isLocalVarDeclOrParm()) 344 continue; 345 346 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 347 /*RefersToEnclosingVariableOrCapture=*/false, 348 VD->getType().getNonReferenceType(), VK_LValue, 349 SourceLocation()); 350 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 351 return CGF.EmitLValue(&DRE).getAddress(); 352 }); 353 } 354 (void)PrivScope.Privatize(); 355 } 356 357 /// \brief Lookup the captured field decl for a variable. 358 const FieldDecl *lookup(const VarDecl *VD) const override { 359 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 360 return FD; 361 return nullptr; 362 } 363 364 /// \brief Emit the captured statement body. 365 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 366 llvm_unreachable("No body for expressions"); 367 } 368 369 /// \brief Get a variable or parameter for storing global thread id 370 /// inside OpenMP construct. 371 const VarDecl *getThreadIDVariable() const override { 372 llvm_unreachable("No thread id for expressions"); 373 } 374 375 /// \brief Get the name of the capture helper. 376 StringRef getHelperName() const override { 377 llvm_unreachable("No helper name for expressions"); 378 } 379 380 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 381 382 private: 383 /// Private scope to capture global variables. 384 CodeGenFunction::OMPPrivateScope PrivScope; 385 }; 386 387 /// \brief RAII for emitting code of OpenMP constructs. 388 class InlinedOpenMPRegionRAII { 389 CodeGenFunction &CGF; 390 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 391 FieldDecl *LambdaThisCaptureField = nullptr; 392 393 public: 394 /// \brief Constructs region for combined constructs. 395 /// \param CodeGen Code generation sequence for combined directives. Includes 396 /// a list of functions used for code generation of implicitly inlined 397 /// regions. 398 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 399 OpenMPDirectiveKind Kind, bool HasCancel) 400 : CGF(CGF) { 401 // Start emission for the construct. 402 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 403 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 404 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 405 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 406 CGF.LambdaThisCaptureField = nullptr; 407 } 408 409 ~InlinedOpenMPRegionRAII() { 410 // Restore original CapturedStmtInfo only if we're done with code emission. 411 auto *OldCSI = 412 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 413 delete CGF.CapturedStmtInfo; 414 CGF.CapturedStmtInfo = OldCSI; 415 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 416 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 417 } 418 }; 419 420 /// \brief Values for bit flags used in the ident_t to describe the fields. 421 /// All enumeric elements are named and described in accordance with the code 422 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 423 enum OpenMPLocationFlags { 424 /// \brief Use trampoline for internal microtask. 425 OMP_IDENT_IMD = 0x01, 426 /// \brief Use c-style ident structure. 427 OMP_IDENT_KMPC = 0x02, 428 /// \brief Atomic reduction option for kmpc_reduce. 429 OMP_ATOMIC_REDUCE = 0x10, 430 /// \brief Explicit 'barrier' directive. 431 OMP_IDENT_BARRIER_EXPL = 0x20, 432 /// \brief Implicit barrier in code. 433 OMP_IDENT_BARRIER_IMPL = 0x40, 434 /// \brief Implicit barrier in 'for' directive. 435 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 436 /// \brief Implicit barrier in 'sections' directive. 437 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 438 /// \brief Implicit barrier in 'single' directive. 439 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 440 }; 441 442 /// \brief Describes ident structure that describes a source location. 443 /// All descriptions are taken from 444 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 445 /// Original structure: 446 /// typedef struct ident { 447 /// kmp_int32 reserved_1; /**< might be used in Fortran; 448 /// see above */ 449 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 450 /// KMP_IDENT_KMPC identifies this union 451 /// member */ 452 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 453 /// see above */ 454 ///#if USE_ITT_BUILD 455 /// /* but currently used for storing 456 /// region-specific ITT */ 457 /// /* contextual information. */ 458 ///#endif /* USE_ITT_BUILD */ 459 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 460 /// C++ */ 461 /// char const *psource; /**< String describing the source location. 462 /// The string is composed of semi-colon separated 463 // fields which describe the source file, 464 /// the function and a pair of line numbers that 465 /// delimit the construct. 466 /// */ 467 /// } ident_t; 468 enum IdentFieldIndex { 469 /// \brief might be used in Fortran 470 IdentField_Reserved_1, 471 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 472 IdentField_Flags, 473 /// \brief Not really used in Fortran any more 474 IdentField_Reserved_2, 475 /// \brief Source[4] in Fortran, do not use for C++ 476 IdentField_Reserved_3, 477 /// \brief String describing the source location. The string is composed of 478 /// semi-colon separated fields which describe the source file, the function 479 /// and a pair of line numbers that delimit the construct. 480 IdentField_PSource 481 }; 482 483 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 484 /// the enum sched_type in kmp.h). 485 enum OpenMPSchedType { 486 /// \brief Lower bound for default (unordered) versions. 487 OMP_sch_lower = 32, 488 OMP_sch_static_chunked = 33, 489 OMP_sch_static = 34, 490 OMP_sch_dynamic_chunked = 35, 491 OMP_sch_guided_chunked = 36, 492 OMP_sch_runtime = 37, 493 OMP_sch_auto = 38, 494 /// static with chunk adjustment (e.g., simd) 495 OMP_sch_static_balanced_chunked = 45, 496 /// \brief Lower bound for 'ordered' versions. 497 OMP_ord_lower = 64, 498 OMP_ord_static_chunked = 65, 499 OMP_ord_static = 66, 500 OMP_ord_dynamic_chunked = 67, 501 OMP_ord_guided_chunked = 68, 502 OMP_ord_runtime = 69, 503 OMP_ord_auto = 70, 504 OMP_sch_default = OMP_sch_static, 505 /// \brief dist_schedule types 506 OMP_dist_sch_static_chunked = 91, 507 OMP_dist_sch_static = 92, 508 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 509 /// Set if the monotonic schedule modifier was present. 510 OMP_sch_modifier_monotonic = (1 << 29), 511 /// Set if the nonmonotonic schedule modifier was present. 512 OMP_sch_modifier_nonmonotonic = (1 << 30), 513 }; 514 515 enum OpenMPRTLFunction { 516 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 517 /// kmpc_micro microtask, ...); 518 OMPRTL__kmpc_fork_call, 519 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 520 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 521 OMPRTL__kmpc_threadprivate_cached, 522 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 523 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 524 OMPRTL__kmpc_threadprivate_register, 525 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 526 OMPRTL__kmpc_global_thread_num, 527 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 528 // kmp_critical_name *crit); 529 OMPRTL__kmpc_critical, 530 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 531 // global_tid, kmp_critical_name *crit, uintptr_t hint); 532 OMPRTL__kmpc_critical_with_hint, 533 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 534 // kmp_critical_name *crit); 535 OMPRTL__kmpc_end_critical, 536 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 537 // global_tid); 538 OMPRTL__kmpc_cancel_barrier, 539 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 540 OMPRTL__kmpc_barrier, 541 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 542 OMPRTL__kmpc_for_static_fini, 543 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 544 // global_tid); 545 OMPRTL__kmpc_serialized_parallel, 546 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 547 // global_tid); 548 OMPRTL__kmpc_end_serialized_parallel, 549 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 550 // kmp_int32 num_threads); 551 OMPRTL__kmpc_push_num_threads, 552 // Call to void __kmpc_flush(ident_t *loc); 553 OMPRTL__kmpc_flush, 554 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 555 OMPRTL__kmpc_master, 556 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 557 OMPRTL__kmpc_end_master, 558 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 559 // int end_part); 560 OMPRTL__kmpc_omp_taskyield, 561 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 562 OMPRTL__kmpc_single, 563 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 564 OMPRTL__kmpc_end_single, 565 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 566 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 567 // kmp_routine_entry_t *task_entry); 568 OMPRTL__kmpc_omp_task_alloc, 569 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 570 // new_task); 571 OMPRTL__kmpc_omp_task, 572 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 573 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 574 // kmp_int32 didit); 575 OMPRTL__kmpc_copyprivate, 576 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 577 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 578 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 579 OMPRTL__kmpc_reduce, 580 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 581 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 582 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 583 // *lck); 584 OMPRTL__kmpc_reduce_nowait, 585 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 586 // kmp_critical_name *lck); 587 OMPRTL__kmpc_end_reduce, 588 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 589 // kmp_critical_name *lck); 590 OMPRTL__kmpc_end_reduce_nowait, 591 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 592 // kmp_task_t * new_task); 593 OMPRTL__kmpc_omp_task_begin_if0, 594 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 595 // kmp_task_t * new_task); 596 OMPRTL__kmpc_omp_task_complete_if0, 597 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 598 OMPRTL__kmpc_ordered, 599 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 600 OMPRTL__kmpc_end_ordered, 601 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 602 // global_tid); 603 OMPRTL__kmpc_omp_taskwait, 604 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 605 OMPRTL__kmpc_taskgroup, 606 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 607 OMPRTL__kmpc_end_taskgroup, 608 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 609 // int proc_bind); 610 OMPRTL__kmpc_push_proc_bind, 611 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 612 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 613 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 614 OMPRTL__kmpc_omp_task_with_deps, 615 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 616 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 617 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 618 OMPRTL__kmpc_omp_wait_deps, 619 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 620 // global_tid, kmp_int32 cncl_kind); 621 OMPRTL__kmpc_cancellationpoint, 622 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 623 // kmp_int32 cncl_kind); 624 OMPRTL__kmpc_cancel, 625 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_teams, kmp_int32 thread_limit); 627 OMPRTL__kmpc_push_num_teams, 628 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 629 // microtask, ...); 630 OMPRTL__kmpc_fork_teams, 631 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 632 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 633 // sched, kmp_uint64 grainsize, void *task_dup); 634 OMPRTL__kmpc_taskloop, 635 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 636 // num_dims, struct kmp_dim *dims); 637 OMPRTL__kmpc_doacross_init, 638 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 639 OMPRTL__kmpc_doacross_fini, 640 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 641 // *vec); 642 OMPRTL__kmpc_doacross_post, 643 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 644 // *vec); 645 OMPRTL__kmpc_doacross_wait, 646 647 // 648 // Offloading related calls 649 // 650 // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 651 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 652 // *arg_types); 653 OMPRTL__tgt_target, 654 // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 655 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 656 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 657 OMPRTL__tgt_target_teams, 658 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 659 OMPRTL__tgt_register_lib, 660 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 661 OMPRTL__tgt_unregister_lib, 662 // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 663 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 664 OMPRTL__tgt_target_data_begin, 665 // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 666 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 667 OMPRTL__tgt_target_data_end, 668 // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 669 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 670 OMPRTL__tgt_target_data_update, 671 }; 672 673 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 674 /// region. 675 class CleanupTy final : public EHScopeStack::Cleanup { 676 PrePostActionTy *Action; 677 678 public: 679 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 680 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 681 if (!CGF.HaveInsertPoint()) 682 return; 683 Action->Exit(CGF); 684 } 685 }; 686 687 } // anonymous namespace 688 689 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 690 CodeGenFunction::RunCleanupsScope Scope(CGF); 691 if (PrePostAction) { 692 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 693 Callback(CodeGen, CGF, *PrePostAction); 694 } else { 695 PrePostActionTy Action; 696 Callback(CodeGen, CGF, Action); 697 } 698 } 699 700 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 701 return CGF.EmitLoadOfPointerLValue( 702 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 703 getThreadIDVariable()->getType()->castAs<PointerType>()); 704 } 705 706 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 707 if (!CGF.HaveInsertPoint()) 708 return; 709 // 1.2.2 OpenMP Language Terminology 710 // Structured block - An executable statement with a single entry at the 711 // top and a single exit at the bottom. 712 // The point of exit cannot be a branch out of the structured block. 713 // longjmp() and throw() must not violate the entry/exit criteria. 714 CGF.EHStack.pushTerminate(); 715 CodeGen(CGF); 716 CGF.EHStack.popTerminate(); 717 } 718 719 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 720 CodeGenFunction &CGF) { 721 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 722 getThreadIDVariable()->getType(), 723 LValueBaseInfo(AlignmentSource::Decl, false)); 724 } 725 726 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 727 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 728 IdentTy = llvm::StructType::create( 729 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 730 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 731 CGM.Int8PtrTy /* psource */); 732 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 733 734 loadOffloadInfoMetadata(); 735 } 736 737 void CGOpenMPRuntime::clear() { 738 InternalVars.clear(); 739 } 740 741 static llvm::Function * 742 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 743 const Expr *CombinerInitializer, const VarDecl *In, 744 const VarDecl *Out, bool IsCombiner) { 745 // void .omp_combiner.(Ty *in, Ty *out); 746 auto &C = CGM.getContext(); 747 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 748 FunctionArgList Args; 749 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 750 /*Id=*/nullptr, PtrTy); 751 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 752 /*Id=*/nullptr, PtrTy); 753 Args.push_back(&OmpOutParm); 754 Args.push_back(&OmpInParm); 755 auto &FnInfo = 756 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 757 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 758 auto *Fn = llvm::Function::Create( 759 FnTy, llvm::GlobalValue::InternalLinkage, 760 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 761 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 762 Fn->removeFnAttr(llvm::Attribute::NoInline); 763 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 764 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 765 CodeGenFunction CGF(CGM); 766 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 767 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 768 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 769 CodeGenFunction::OMPPrivateScope Scope(CGF); 770 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 771 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 772 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 773 .getAddress(); 774 }); 775 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 776 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 777 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 778 .getAddress(); 779 }); 780 (void)Scope.Privatize(); 781 CGF.EmitIgnoredExpr(CombinerInitializer); 782 Scope.ForceCleanup(); 783 CGF.FinishFunction(); 784 return Fn; 785 } 786 787 void CGOpenMPRuntime::emitUserDefinedReduction( 788 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 789 if (UDRMap.count(D) > 0) 790 return; 791 auto &C = CGM.getContext(); 792 if (!In || !Out) { 793 In = &C.Idents.get("omp_in"); 794 Out = &C.Idents.get("omp_out"); 795 } 796 llvm::Function *Combiner = emitCombinerOrInitializer( 797 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 798 cast<VarDecl>(D->lookup(Out).front()), 799 /*IsCombiner=*/true); 800 llvm::Function *Initializer = nullptr; 801 if (auto *Init = D->getInitializer()) { 802 if (!Priv || !Orig) { 803 Priv = &C.Idents.get("omp_priv"); 804 Orig = &C.Idents.get("omp_orig"); 805 } 806 Initializer = emitCombinerOrInitializer( 807 CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), 808 cast<VarDecl>(D->lookup(Priv).front()), 809 /*IsCombiner=*/false); 810 } 811 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 812 if (CGF) { 813 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 814 Decls.second.push_back(D); 815 } 816 } 817 818 std::pair<llvm::Function *, llvm::Function *> 819 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 820 auto I = UDRMap.find(D); 821 if (I != UDRMap.end()) 822 return I->second; 823 emitUserDefinedReduction(/*CGF=*/nullptr, D); 824 return UDRMap.lookup(D); 825 } 826 827 // Layout information for ident_t. 828 static CharUnits getIdentAlign(CodeGenModule &CGM) { 829 return CGM.getPointerAlign(); 830 } 831 static CharUnits getIdentSize(CodeGenModule &CGM) { 832 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 833 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 834 } 835 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 836 // All the fields except the last are i32, so this works beautifully. 837 return unsigned(Field) * CharUnits::fromQuantity(4); 838 } 839 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 840 IdentFieldIndex Field, 841 const llvm::Twine &Name = "") { 842 auto Offset = getOffsetOfIdentField(Field); 843 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 844 } 845 846 static llvm::Value *emitParallelOrTeamsOutlinedFunction( 847 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 848 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 849 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 850 assert(ThreadIDVar->getType()->isPointerType() && 851 "thread id variable must be of type kmp_int32 *"); 852 CodeGenFunction CGF(CGM, true); 853 bool HasCancel = false; 854 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 855 HasCancel = OPD->hasCancel(); 856 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 857 HasCancel = OPSD->hasCancel(); 858 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 859 HasCancel = OPFD->hasCancel(); 860 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 861 HasCancel, OutlinedHelperName); 862 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 863 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 864 } 865 866 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 867 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 868 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 869 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 870 return emitParallelOrTeamsOutlinedFunction( 871 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 872 } 873 874 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( 875 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 876 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 877 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 878 return emitParallelOrTeamsOutlinedFunction( 879 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 880 } 881 882 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 883 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 884 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 885 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 886 bool Tied, unsigned &NumberOfParts) { 887 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 888 PrePostActionTy &) { 889 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 890 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 891 llvm::Value *TaskArgs[] = { 892 UpLoc, ThreadID, 893 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 894 TaskTVar->getType()->castAs<PointerType>()) 895 .getPointer()}; 896 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 897 }; 898 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 899 UntiedCodeGen); 900 CodeGen.setAction(Action); 901 assert(!ThreadIDVar->getType()->isPointerType() && 902 "thread id variable must be of type kmp_int32 for tasks"); 903 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 904 auto *TD = dyn_cast<OMPTaskDirective>(&D); 905 CodeGenFunction CGF(CGM, true); 906 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 907 InnermostKind, 908 TD ? TD->hasCancel() : false, Action); 909 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 910 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 911 if (!Tied) 912 NumberOfParts = Action.getNumberOfParts(); 913 return Res; 914 } 915 916 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 917 CharUnits Align = getIdentAlign(CGM); 918 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 919 if (!Entry) { 920 if (!DefaultOpenMPPSource) { 921 // Initialize default location for psource field of ident_t structure of 922 // all ident_t objects. Format is ";file;function;line;column;;". 923 // Taken from 924 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 925 DefaultOpenMPPSource = 926 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 927 DefaultOpenMPPSource = 928 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 929 } 930 931 ConstantInitBuilder builder(CGM); 932 auto fields = builder.beginStruct(IdentTy); 933 fields.addInt(CGM.Int32Ty, 0); 934 fields.addInt(CGM.Int32Ty, Flags); 935 fields.addInt(CGM.Int32Ty, 0); 936 fields.addInt(CGM.Int32Ty, 0); 937 fields.add(DefaultOpenMPPSource); 938 auto DefaultOpenMPLocation = 939 fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, 940 llvm::GlobalValue::PrivateLinkage); 941 DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 942 943 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 944 } 945 return Address(Entry, Align); 946 } 947 948 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 949 SourceLocation Loc, 950 unsigned Flags) { 951 Flags |= OMP_IDENT_KMPC; 952 // If no debug info is generated - return global default location. 953 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 954 Loc.isInvalid()) 955 return getOrCreateDefaultLocation(Flags).getPointer(); 956 957 assert(CGF.CurFn && "No function in current CodeGenFunction."); 958 959 Address LocValue = Address::invalid(); 960 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 961 if (I != OpenMPLocThreadIDMap.end()) 962 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 963 964 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 965 // GetOpenMPThreadID was called before this routine. 966 if (!LocValue.isValid()) { 967 // Generate "ident_t .kmpc_loc.addr;" 968 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 969 ".kmpc_loc.addr"); 970 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 971 Elem.second.DebugLoc = AI.getPointer(); 972 LocValue = AI; 973 974 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 975 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 976 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 977 CGM.getSize(getIdentSize(CGF.CGM))); 978 } 979 980 // char **psource = &.kmpc_loc_<flags>.addr.psource; 981 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 982 983 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 984 if (OMPDebugLoc == nullptr) { 985 SmallString<128> Buffer2; 986 llvm::raw_svector_ostream OS2(Buffer2); 987 // Build debug location 988 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 989 OS2 << ";" << PLoc.getFilename() << ";"; 990 if (const FunctionDecl *FD = 991 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 992 OS2 << FD->getQualifiedNameAsString(); 993 } 994 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 995 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 996 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 997 } 998 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 999 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 1000 1001 // Our callers always pass this to a runtime function, so for 1002 // convenience, go ahead and return a naked pointer. 1003 return LocValue.getPointer(); 1004 } 1005 1006 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1007 SourceLocation Loc) { 1008 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1009 1010 llvm::Value *ThreadID = nullptr; 1011 // Check whether we've already cached a load of the thread id in this 1012 // function. 1013 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1014 if (I != OpenMPLocThreadIDMap.end()) { 1015 ThreadID = I->second.ThreadID; 1016 if (ThreadID != nullptr) 1017 return ThreadID; 1018 } 1019 if (auto *OMPRegionInfo = 1020 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1021 if (OMPRegionInfo->getThreadIDVariable()) { 1022 // Check if this an outlined function with thread id passed as argument. 1023 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1024 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 1025 // If value loaded in entry block, cache it and use it everywhere in 1026 // function. 1027 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1028 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1029 Elem.second.ThreadID = ThreadID; 1030 } 1031 return ThreadID; 1032 } 1033 } 1034 1035 // This is not an outlined function region - need to call __kmpc_int32 1036 // kmpc_global_thread_num(ident_t *loc). 1037 // Generate thread id value and cache this value for use across the 1038 // function. 1039 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1040 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1041 ThreadID = 1042 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1043 emitUpdateLocation(CGF, Loc)); 1044 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1045 Elem.second.ThreadID = ThreadID; 1046 return ThreadID; 1047 } 1048 1049 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1050 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1051 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1052 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1053 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1054 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1055 UDRMap.erase(D); 1056 } 1057 FunctionUDRMap.erase(CGF.CurFn); 1058 } 1059 } 1060 1061 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1062 if (!IdentTy) { 1063 } 1064 return llvm::PointerType::getUnqual(IdentTy); 1065 } 1066 1067 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1068 if (!Kmpc_MicroTy) { 1069 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1070 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1071 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1072 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1073 } 1074 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1075 } 1076 1077 llvm::Constant * 1078 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1079 llvm::Constant *RTLFn = nullptr; 1080 switch (static_cast<OpenMPRTLFunction>(Function)) { 1081 case OMPRTL__kmpc_fork_call: { 1082 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1083 // microtask, ...); 1084 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1085 getKmpc_MicroPointerTy()}; 1086 llvm::FunctionType *FnTy = 1087 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1088 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1089 break; 1090 } 1091 case OMPRTL__kmpc_global_thread_num: { 1092 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1093 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1094 llvm::FunctionType *FnTy = 1095 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1096 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1097 break; 1098 } 1099 case OMPRTL__kmpc_threadprivate_cached: { 1100 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1101 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1102 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1103 CGM.VoidPtrTy, CGM.SizeTy, 1104 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1105 llvm::FunctionType *FnTy = 1106 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1107 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1108 break; 1109 } 1110 case OMPRTL__kmpc_critical: { 1111 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1112 // kmp_critical_name *crit); 1113 llvm::Type *TypeParams[] = { 1114 getIdentTyPointerTy(), CGM.Int32Ty, 1115 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1116 llvm::FunctionType *FnTy = 1117 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1118 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1119 break; 1120 } 1121 case OMPRTL__kmpc_critical_with_hint: { 1122 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1123 // kmp_critical_name *crit, uintptr_t hint); 1124 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1125 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1126 CGM.IntPtrTy}; 1127 llvm::FunctionType *FnTy = 1128 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1129 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1130 break; 1131 } 1132 case OMPRTL__kmpc_threadprivate_register: { 1133 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1134 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1135 // typedef void *(*kmpc_ctor)(void *); 1136 auto KmpcCtorTy = 1137 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1138 /*isVarArg*/ false)->getPointerTo(); 1139 // typedef void *(*kmpc_cctor)(void *, void *); 1140 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1141 auto KmpcCopyCtorTy = 1142 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1143 /*isVarArg*/ false)->getPointerTo(); 1144 // typedef void (*kmpc_dtor)(void *); 1145 auto KmpcDtorTy = 1146 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1147 ->getPointerTo(); 1148 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1149 KmpcCopyCtorTy, KmpcDtorTy}; 1150 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1151 /*isVarArg*/ false); 1152 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1153 break; 1154 } 1155 case OMPRTL__kmpc_end_critical: { 1156 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1157 // kmp_critical_name *crit); 1158 llvm::Type *TypeParams[] = { 1159 getIdentTyPointerTy(), CGM.Int32Ty, 1160 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1161 llvm::FunctionType *FnTy = 1162 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1163 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1164 break; 1165 } 1166 case OMPRTL__kmpc_cancel_barrier: { 1167 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1168 // global_tid); 1169 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1170 llvm::FunctionType *FnTy = 1171 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1172 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1173 break; 1174 } 1175 case OMPRTL__kmpc_barrier: { 1176 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1177 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1178 llvm::FunctionType *FnTy = 1179 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1180 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1181 break; 1182 } 1183 case OMPRTL__kmpc_for_static_fini: { 1184 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1185 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1186 llvm::FunctionType *FnTy = 1187 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1188 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1189 break; 1190 } 1191 case OMPRTL__kmpc_push_num_threads: { 1192 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1193 // kmp_int32 num_threads) 1194 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1195 CGM.Int32Ty}; 1196 llvm::FunctionType *FnTy = 1197 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1198 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1199 break; 1200 } 1201 case OMPRTL__kmpc_serialized_parallel: { 1202 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1203 // global_tid); 1204 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1205 llvm::FunctionType *FnTy = 1206 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1207 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1208 break; 1209 } 1210 case OMPRTL__kmpc_end_serialized_parallel: { 1211 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1212 // global_tid); 1213 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1214 llvm::FunctionType *FnTy = 1215 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1216 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1217 break; 1218 } 1219 case OMPRTL__kmpc_flush: { 1220 // Build void __kmpc_flush(ident_t *loc); 1221 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1222 llvm::FunctionType *FnTy = 1223 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1224 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1225 break; 1226 } 1227 case OMPRTL__kmpc_master: { 1228 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1229 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1230 llvm::FunctionType *FnTy = 1231 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1232 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1233 break; 1234 } 1235 case OMPRTL__kmpc_end_master: { 1236 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1237 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1238 llvm::FunctionType *FnTy = 1239 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1240 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1241 break; 1242 } 1243 case OMPRTL__kmpc_omp_taskyield: { 1244 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1245 // int end_part); 1246 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1247 llvm::FunctionType *FnTy = 1248 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1249 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1250 break; 1251 } 1252 case OMPRTL__kmpc_single: { 1253 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1254 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1255 llvm::FunctionType *FnTy = 1256 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1257 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1258 break; 1259 } 1260 case OMPRTL__kmpc_end_single: { 1261 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1262 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1263 llvm::FunctionType *FnTy = 1264 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1265 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1266 break; 1267 } 1268 case OMPRTL__kmpc_omp_task_alloc: { 1269 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1270 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1271 // kmp_routine_entry_t *task_entry); 1272 assert(KmpRoutineEntryPtrTy != nullptr && 1273 "Type kmp_routine_entry_t must be created."); 1274 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1275 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1276 // Return void * and then cast to particular kmp_task_t type. 1277 llvm::FunctionType *FnTy = 1278 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1279 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1280 break; 1281 } 1282 case OMPRTL__kmpc_omp_task: { 1283 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1284 // *new_task); 1285 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1286 CGM.VoidPtrTy}; 1287 llvm::FunctionType *FnTy = 1288 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1289 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1290 break; 1291 } 1292 case OMPRTL__kmpc_copyprivate: { 1293 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1294 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1295 // kmp_int32 didit); 1296 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1297 auto *CpyFnTy = 1298 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1299 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1300 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1301 CGM.Int32Ty}; 1302 llvm::FunctionType *FnTy = 1303 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1304 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1305 break; 1306 } 1307 case OMPRTL__kmpc_reduce: { 1308 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1309 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1310 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1311 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1312 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1313 /*isVarArg=*/false); 1314 llvm::Type *TypeParams[] = { 1315 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1316 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1317 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1318 llvm::FunctionType *FnTy = 1319 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1320 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1321 break; 1322 } 1323 case OMPRTL__kmpc_reduce_nowait: { 1324 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1325 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1326 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1327 // *lck); 1328 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1329 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1330 /*isVarArg=*/false); 1331 llvm::Type *TypeParams[] = { 1332 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1333 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1334 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1335 llvm::FunctionType *FnTy = 1336 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1337 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1338 break; 1339 } 1340 case OMPRTL__kmpc_end_reduce: { 1341 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1342 // kmp_critical_name *lck); 1343 llvm::Type *TypeParams[] = { 1344 getIdentTyPointerTy(), CGM.Int32Ty, 1345 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1346 llvm::FunctionType *FnTy = 1347 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1348 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1349 break; 1350 } 1351 case OMPRTL__kmpc_end_reduce_nowait: { 1352 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1353 // kmp_critical_name *lck); 1354 llvm::Type *TypeParams[] = { 1355 getIdentTyPointerTy(), CGM.Int32Ty, 1356 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1357 llvm::FunctionType *FnTy = 1358 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1359 RTLFn = 1360 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1361 break; 1362 } 1363 case OMPRTL__kmpc_omp_task_begin_if0: { 1364 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1365 // *new_task); 1366 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1367 CGM.VoidPtrTy}; 1368 llvm::FunctionType *FnTy = 1369 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1370 RTLFn = 1371 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1372 break; 1373 } 1374 case OMPRTL__kmpc_omp_task_complete_if0: { 1375 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1376 // *new_task); 1377 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1378 CGM.VoidPtrTy}; 1379 llvm::FunctionType *FnTy = 1380 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1381 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1382 /*Name=*/"__kmpc_omp_task_complete_if0"); 1383 break; 1384 } 1385 case OMPRTL__kmpc_ordered: { 1386 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1387 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1388 llvm::FunctionType *FnTy = 1389 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1390 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1391 break; 1392 } 1393 case OMPRTL__kmpc_end_ordered: { 1394 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1395 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1396 llvm::FunctionType *FnTy = 1397 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1398 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1399 break; 1400 } 1401 case OMPRTL__kmpc_omp_taskwait: { 1402 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1403 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1404 llvm::FunctionType *FnTy = 1405 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1406 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1407 break; 1408 } 1409 case OMPRTL__kmpc_taskgroup: { 1410 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1411 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1412 llvm::FunctionType *FnTy = 1413 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1414 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1415 break; 1416 } 1417 case OMPRTL__kmpc_end_taskgroup: { 1418 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1419 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1420 llvm::FunctionType *FnTy = 1421 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1422 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1423 break; 1424 } 1425 case OMPRTL__kmpc_push_proc_bind: { 1426 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1427 // int proc_bind) 1428 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1429 llvm::FunctionType *FnTy = 1430 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1431 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1432 break; 1433 } 1434 case OMPRTL__kmpc_omp_task_with_deps: { 1435 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1436 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1437 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1438 llvm::Type *TypeParams[] = { 1439 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1440 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1441 llvm::FunctionType *FnTy = 1442 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1443 RTLFn = 1444 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1445 break; 1446 } 1447 case OMPRTL__kmpc_omp_wait_deps: { 1448 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1449 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1450 // kmp_depend_info_t *noalias_dep_list); 1451 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1452 CGM.Int32Ty, CGM.VoidPtrTy, 1453 CGM.Int32Ty, CGM.VoidPtrTy}; 1454 llvm::FunctionType *FnTy = 1455 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1456 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1457 break; 1458 } 1459 case OMPRTL__kmpc_cancellationpoint: { 1460 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1461 // global_tid, kmp_int32 cncl_kind) 1462 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1463 llvm::FunctionType *FnTy = 1464 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1465 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1466 break; 1467 } 1468 case OMPRTL__kmpc_cancel: { 1469 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1470 // kmp_int32 cncl_kind) 1471 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1472 llvm::FunctionType *FnTy = 1473 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1474 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1475 break; 1476 } 1477 case OMPRTL__kmpc_push_num_teams: { 1478 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1479 // kmp_int32 num_teams, kmp_int32 num_threads) 1480 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1481 CGM.Int32Ty}; 1482 llvm::FunctionType *FnTy = 1483 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1484 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1485 break; 1486 } 1487 case OMPRTL__kmpc_fork_teams: { 1488 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1489 // microtask, ...); 1490 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1491 getKmpc_MicroPointerTy()}; 1492 llvm::FunctionType *FnTy = 1493 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1494 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1495 break; 1496 } 1497 case OMPRTL__kmpc_taskloop: { 1498 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1499 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1500 // sched, kmp_uint64 grainsize, void *task_dup); 1501 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1502 CGM.IntTy, 1503 CGM.VoidPtrTy, 1504 CGM.IntTy, 1505 CGM.Int64Ty->getPointerTo(), 1506 CGM.Int64Ty->getPointerTo(), 1507 CGM.Int64Ty, 1508 CGM.IntTy, 1509 CGM.IntTy, 1510 CGM.Int64Ty, 1511 CGM.VoidPtrTy}; 1512 llvm::FunctionType *FnTy = 1513 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1514 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 1515 break; 1516 } 1517 case OMPRTL__kmpc_doacross_init: { 1518 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 1519 // num_dims, struct kmp_dim *dims); 1520 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1521 CGM.Int32Ty, 1522 CGM.Int32Ty, 1523 CGM.VoidPtrTy}; 1524 llvm::FunctionType *FnTy = 1525 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1526 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 1527 break; 1528 } 1529 case OMPRTL__kmpc_doacross_fini: { 1530 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 1531 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1532 llvm::FunctionType *FnTy = 1533 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1534 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 1535 break; 1536 } 1537 case OMPRTL__kmpc_doacross_post: { 1538 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 1539 // *vec); 1540 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1541 CGM.Int64Ty->getPointerTo()}; 1542 llvm::FunctionType *FnTy = 1543 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1544 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 1545 break; 1546 } 1547 case OMPRTL__kmpc_doacross_wait: { 1548 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 1549 // *vec); 1550 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1551 CGM.Int64Ty->getPointerTo()}; 1552 llvm::FunctionType *FnTy = 1553 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1554 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 1555 break; 1556 } 1557 case OMPRTL__tgt_target: { 1558 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 1559 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 1560 // *arg_types); 1561 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1562 CGM.VoidPtrTy, 1563 CGM.Int32Ty, 1564 CGM.VoidPtrPtrTy, 1565 CGM.VoidPtrPtrTy, 1566 CGM.SizeTy->getPointerTo(), 1567 CGM.Int32Ty->getPointerTo()}; 1568 llvm::FunctionType *FnTy = 1569 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1570 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 1571 break; 1572 } 1573 case OMPRTL__tgt_target_teams: { 1574 // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 1575 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 1576 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 1577 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1578 CGM.VoidPtrTy, 1579 CGM.Int32Ty, 1580 CGM.VoidPtrPtrTy, 1581 CGM.VoidPtrPtrTy, 1582 CGM.SizeTy->getPointerTo(), 1583 CGM.Int32Ty->getPointerTo(), 1584 CGM.Int32Ty, 1585 CGM.Int32Ty}; 1586 llvm::FunctionType *FnTy = 1587 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1588 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 1589 break; 1590 } 1591 case OMPRTL__tgt_register_lib: { 1592 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 1593 QualType ParamTy = 1594 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1595 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1596 llvm::FunctionType *FnTy = 1597 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1598 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 1599 break; 1600 } 1601 case OMPRTL__tgt_unregister_lib: { 1602 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 1603 QualType ParamTy = 1604 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1605 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1606 llvm::FunctionType *FnTy = 1607 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1608 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 1609 break; 1610 } 1611 case OMPRTL__tgt_target_data_begin: { 1612 // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 1613 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1614 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1615 CGM.Int32Ty, 1616 CGM.VoidPtrPtrTy, 1617 CGM.VoidPtrPtrTy, 1618 CGM.SizeTy->getPointerTo(), 1619 CGM.Int32Ty->getPointerTo()}; 1620 llvm::FunctionType *FnTy = 1621 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1622 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 1623 break; 1624 } 1625 case OMPRTL__tgt_target_data_end: { 1626 // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 1627 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1628 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1629 CGM.Int32Ty, 1630 CGM.VoidPtrPtrTy, 1631 CGM.VoidPtrPtrTy, 1632 CGM.SizeTy->getPointerTo(), 1633 CGM.Int32Ty->getPointerTo()}; 1634 llvm::FunctionType *FnTy = 1635 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1636 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 1637 break; 1638 } 1639 case OMPRTL__tgt_target_data_update: { 1640 // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 1641 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1642 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1643 CGM.Int32Ty, 1644 CGM.VoidPtrPtrTy, 1645 CGM.VoidPtrPtrTy, 1646 CGM.SizeTy->getPointerTo(), 1647 CGM.Int32Ty->getPointerTo()}; 1648 llvm::FunctionType *FnTy = 1649 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1650 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 1651 break; 1652 } 1653 } 1654 assert(RTLFn && "Unable to find OpenMP runtime function"); 1655 return RTLFn; 1656 } 1657 1658 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 1659 bool IVSigned) { 1660 assert((IVSize == 32 || IVSize == 64) && 1661 "IV size is not compatible with the omp runtime"); 1662 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1663 : "__kmpc_for_static_init_4u") 1664 : (IVSigned ? "__kmpc_for_static_init_8" 1665 : "__kmpc_for_static_init_8u"); 1666 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1667 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1668 llvm::Type *TypeParams[] = { 1669 getIdentTyPointerTy(), // loc 1670 CGM.Int32Ty, // tid 1671 CGM.Int32Ty, // schedtype 1672 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1673 PtrTy, // p_lower 1674 PtrTy, // p_upper 1675 PtrTy, // p_stride 1676 ITy, // incr 1677 ITy // chunk 1678 }; 1679 llvm::FunctionType *FnTy = 1680 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1681 return CGM.CreateRuntimeFunction(FnTy, Name); 1682 } 1683 1684 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 1685 bool IVSigned) { 1686 assert((IVSize == 32 || IVSize == 64) && 1687 "IV size is not compatible with the omp runtime"); 1688 auto Name = 1689 IVSize == 32 1690 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1691 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1692 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1693 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1694 CGM.Int32Ty, // tid 1695 CGM.Int32Ty, // schedtype 1696 ITy, // lower 1697 ITy, // upper 1698 ITy, // stride 1699 ITy // chunk 1700 }; 1701 llvm::FunctionType *FnTy = 1702 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1703 return CGM.CreateRuntimeFunction(FnTy, Name); 1704 } 1705 1706 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1707 bool IVSigned) { 1708 assert((IVSize == 32 || IVSize == 64) && 1709 "IV size is not compatible with the omp runtime"); 1710 auto Name = 1711 IVSize == 32 1712 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1713 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1714 llvm::Type *TypeParams[] = { 1715 getIdentTyPointerTy(), // loc 1716 CGM.Int32Ty, // tid 1717 }; 1718 llvm::FunctionType *FnTy = 1719 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1720 return CGM.CreateRuntimeFunction(FnTy, Name); 1721 } 1722 1723 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1724 bool IVSigned) { 1725 assert((IVSize == 32 || IVSize == 64) && 1726 "IV size is not compatible with the omp runtime"); 1727 auto Name = 1728 IVSize == 32 1729 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1730 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1731 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1732 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1733 llvm::Type *TypeParams[] = { 1734 getIdentTyPointerTy(), // loc 1735 CGM.Int32Ty, // tid 1736 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1737 PtrTy, // p_lower 1738 PtrTy, // p_upper 1739 PtrTy // p_stride 1740 }; 1741 llvm::FunctionType *FnTy = 1742 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1743 return CGM.CreateRuntimeFunction(FnTy, Name); 1744 } 1745 1746 llvm::Constant * 1747 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1748 assert(!CGM.getLangOpts().OpenMPUseTLS || 1749 !CGM.getContext().getTargetInfo().isTLSSupported()); 1750 // Lookup the entry, lazily creating it if necessary. 1751 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1752 Twine(CGM.getMangledName(VD)) + ".cache."); 1753 } 1754 1755 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1756 const VarDecl *VD, 1757 Address VDAddr, 1758 SourceLocation Loc) { 1759 if (CGM.getLangOpts().OpenMPUseTLS && 1760 CGM.getContext().getTargetInfo().isTLSSupported()) 1761 return VDAddr; 1762 1763 auto VarTy = VDAddr.getElementType(); 1764 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1765 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1766 CGM.Int8PtrTy), 1767 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1768 getOrCreateThreadPrivateCache(VD)}; 1769 return Address(CGF.EmitRuntimeCall( 1770 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1771 VDAddr.getAlignment()); 1772 } 1773 1774 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1775 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1776 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1777 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1778 // library. 1779 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1780 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1781 OMPLoc); 1782 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1783 // to register constructor/destructor for variable. 1784 llvm::Value *Args[] = {OMPLoc, 1785 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1786 CGM.VoidPtrTy), 1787 Ctor, CopyCtor, Dtor}; 1788 CGF.EmitRuntimeCall( 1789 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1790 } 1791 1792 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1793 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1794 bool PerformInit, CodeGenFunction *CGF) { 1795 if (CGM.getLangOpts().OpenMPUseTLS && 1796 CGM.getContext().getTargetInfo().isTLSSupported()) 1797 return nullptr; 1798 1799 VD = VD->getDefinition(CGM.getContext()); 1800 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1801 ThreadPrivateWithDefinition.insert(VD); 1802 QualType ASTTy = VD->getType(); 1803 1804 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1805 auto Init = VD->getAnyInitializer(); 1806 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1807 // Generate function that re-emits the declaration's initializer into the 1808 // threadprivate copy of the variable VD 1809 CodeGenFunction CtorCGF(CGM); 1810 FunctionArgList Args; 1811 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1812 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1813 Args.push_back(&Dst); 1814 1815 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1816 CGM.getContext().VoidPtrTy, Args); 1817 auto FTy = CGM.getTypes().GetFunctionType(FI); 1818 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1819 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1820 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1821 Args, SourceLocation()); 1822 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1823 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1824 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1825 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1826 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1827 CtorCGF.ConvertTypeForMem(ASTTy)); 1828 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1829 /*IsInitializer=*/true); 1830 ArgVal = CtorCGF.EmitLoadOfScalar( 1831 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1832 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1833 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1834 CtorCGF.FinishFunction(); 1835 Ctor = Fn; 1836 } 1837 if (VD->getType().isDestructedType() != QualType::DK_none) { 1838 // Generate function that emits destructor call for the threadprivate copy 1839 // of the variable VD 1840 CodeGenFunction DtorCGF(CGM); 1841 FunctionArgList Args; 1842 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1843 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1844 Args.push_back(&Dst); 1845 1846 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1847 CGM.getContext().VoidTy, Args); 1848 auto FTy = CGM.getTypes().GetFunctionType(FI); 1849 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1850 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1851 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1852 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1853 SourceLocation()); 1854 // Create a scope with an artificial location for the body of this function. 1855 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1856 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1857 DtorCGF.GetAddrOfLocalVar(&Dst), 1858 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1859 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1860 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1861 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1862 DtorCGF.FinishFunction(); 1863 Dtor = Fn; 1864 } 1865 // Do not emit init function if it is not required. 1866 if (!Ctor && !Dtor) 1867 return nullptr; 1868 1869 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1870 auto CopyCtorTy = 1871 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1872 /*isVarArg=*/false)->getPointerTo(); 1873 // Copying constructor for the threadprivate variable. 1874 // Must be NULL - reserved by runtime, but currently it requires that this 1875 // parameter is always NULL. Otherwise it fires assertion. 1876 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1877 if (Ctor == nullptr) { 1878 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1879 /*isVarArg=*/false)->getPointerTo(); 1880 Ctor = llvm::Constant::getNullValue(CtorTy); 1881 } 1882 if (Dtor == nullptr) { 1883 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1884 /*isVarArg=*/false)->getPointerTo(); 1885 Dtor = llvm::Constant::getNullValue(DtorTy); 1886 } 1887 if (!CGF) { 1888 auto InitFunctionTy = 1889 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1890 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1891 InitFunctionTy, ".__omp_threadprivate_init_.", 1892 CGM.getTypes().arrangeNullaryFunction()); 1893 CodeGenFunction InitCGF(CGM); 1894 FunctionArgList ArgList; 1895 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1896 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1897 Loc); 1898 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1899 InitCGF.FinishFunction(); 1900 return InitFunction; 1901 } 1902 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1903 } 1904 return nullptr; 1905 } 1906 1907 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1908 /// function. Here is the logic: 1909 /// if (Cond) { 1910 /// ThenGen(); 1911 /// } else { 1912 /// ElseGen(); 1913 /// } 1914 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1915 const RegionCodeGenTy &ThenGen, 1916 const RegionCodeGenTy &ElseGen) { 1917 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1918 1919 // If the condition constant folds and can be elided, try to avoid emitting 1920 // the condition and the dead arm of the if/else. 1921 bool CondConstant; 1922 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1923 if (CondConstant) 1924 ThenGen(CGF); 1925 else 1926 ElseGen(CGF); 1927 return; 1928 } 1929 1930 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1931 // emit the conditional branch. 1932 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1933 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1934 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1935 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1936 1937 // Emit the 'then' code. 1938 CGF.EmitBlock(ThenBlock); 1939 ThenGen(CGF); 1940 CGF.EmitBranch(ContBlock); 1941 // Emit the 'else' code if present. 1942 // There is no need to emit line number for unconditional branch. 1943 (void)ApplyDebugLocation::CreateEmpty(CGF); 1944 CGF.EmitBlock(ElseBlock); 1945 ElseGen(CGF); 1946 // There is no need to emit line number for unconditional branch. 1947 (void)ApplyDebugLocation::CreateEmpty(CGF); 1948 CGF.EmitBranch(ContBlock); 1949 // Emit the continuation block for code after the if. 1950 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1951 } 1952 1953 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1954 llvm::Value *OutlinedFn, 1955 ArrayRef<llvm::Value *> CapturedVars, 1956 const Expr *IfCond) { 1957 if (!CGF.HaveInsertPoint()) 1958 return; 1959 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1960 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 1961 PrePostActionTy &) { 1962 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1963 auto &RT = CGF.CGM.getOpenMPRuntime(); 1964 llvm::Value *Args[] = { 1965 RTLoc, 1966 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1967 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1968 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1969 RealArgs.append(std::begin(Args), std::end(Args)); 1970 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1971 1972 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 1973 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1974 }; 1975 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 1976 PrePostActionTy &) { 1977 auto &RT = CGF.CGM.getOpenMPRuntime(); 1978 auto ThreadID = RT.getThreadID(CGF, Loc); 1979 // Build calls: 1980 // __kmpc_serialized_parallel(&Loc, GTid); 1981 llvm::Value *Args[] = {RTLoc, ThreadID}; 1982 CGF.EmitRuntimeCall( 1983 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 1984 1985 // OutlinedFn(>id, &zero, CapturedStruct); 1986 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1987 Address ZeroAddr = 1988 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1989 /*Name*/ ".zero.addr"); 1990 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1991 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1992 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1993 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1994 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1995 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1996 1997 // __kmpc_end_serialized_parallel(&Loc, GTid); 1998 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1999 CGF.EmitRuntimeCall( 2000 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2001 EndArgs); 2002 }; 2003 if (IfCond) 2004 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2005 else { 2006 RegionCodeGenTy ThenRCG(ThenGen); 2007 ThenRCG(CGF); 2008 } 2009 } 2010 2011 // If we're inside an (outlined) parallel region, use the region info's 2012 // thread-ID variable (it is passed in a first argument of the outlined function 2013 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2014 // regular serial code region, get thread ID by calling kmp_int32 2015 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2016 // return the address of that temp. 2017 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2018 SourceLocation Loc) { 2019 if (auto *OMPRegionInfo = 2020 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2021 if (OMPRegionInfo->getThreadIDVariable()) 2022 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2023 2024 auto ThreadID = getThreadID(CGF, Loc); 2025 auto Int32Ty = 2026 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2027 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2028 CGF.EmitStoreOfScalar(ThreadID, 2029 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2030 2031 return ThreadIDTemp; 2032 } 2033 2034 llvm::Constant * 2035 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2036 const llvm::Twine &Name) { 2037 SmallString<256> Buffer; 2038 llvm::raw_svector_ostream Out(Buffer); 2039 Out << Name; 2040 auto RuntimeName = Out.str(); 2041 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 2042 if (Elem.second) { 2043 assert(Elem.second->getType()->getPointerElementType() == Ty && 2044 "OMP internal variable has different type than requested"); 2045 return &*Elem.second; 2046 } 2047 2048 return Elem.second = new llvm::GlobalVariable( 2049 CGM.getModule(), Ty, /*IsConstant*/ false, 2050 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2051 Elem.first()); 2052 } 2053 2054 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2055 llvm::Twine Name(".gomp_critical_user_", CriticalName); 2056 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 2057 } 2058 2059 namespace { 2060 /// Common pre(post)-action for different OpenMP constructs. 2061 class CommonActionTy final : public PrePostActionTy { 2062 llvm::Value *EnterCallee; 2063 ArrayRef<llvm::Value *> EnterArgs; 2064 llvm::Value *ExitCallee; 2065 ArrayRef<llvm::Value *> ExitArgs; 2066 bool Conditional; 2067 llvm::BasicBlock *ContBlock = nullptr; 2068 2069 public: 2070 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2071 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2072 bool Conditional = false) 2073 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2074 ExitArgs(ExitArgs), Conditional(Conditional) {} 2075 void Enter(CodeGenFunction &CGF) override { 2076 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2077 if (Conditional) { 2078 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2079 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2080 ContBlock = CGF.createBasicBlock("omp_if.end"); 2081 // Generate the branch (If-stmt) 2082 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2083 CGF.EmitBlock(ThenBlock); 2084 } 2085 } 2086 void Done(CodeGenFunction &CGF) { 2087 // Emit the rest of blocks/branches 2088 CGF.EmitBranch(ContBlock); 2089 CGF.EmitBlock(ContBlock, true); 2090 } 2091 void Exit(CodeGenFunction &CGF) override { 2092 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2093 } 2094 }; 2095 } // anonymous namespace 2096 2097 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2098 StringRef CriticalName, 2099 const RegionCodeGenTy &CriticalOpGen, 2100 SourceLocation Loc, const Expr *Hint) { 2101 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2102 // CriticalOpGen(); 2103 // __kmpc_end_critical(ident_t *, gtid, Lock); 2104 // Prepare arguments and build a call to __kmpc_critical 2105 if (!CGF.HaveInsertPoint()) 2106 return; 2107 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2108 getCriticalRegionLock(CriticalName)}; 2109 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2110 std::end(Args)); 2111 if (Hint) { 2112 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2113 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2114 } 2115 CommonActionTy Action( 2116 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2117 : OMPRTL__kmpc_critical), 2118 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2119 CriticalOpGen.setAction(Action); 2120 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2121 } 2122 2123 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2124 const RegionCodeGenTy &MasterOpGen, 2125 SourceLocation Loc) { 2126 if (!CGF.HaveInsertPoint()) 2127 return; 2128 // if(__kmpc_master(ident_t *, gtid)) { 2129 // MasterOpGen(); 2130 // __kmpc_end_master(ident_t *, gtid); 2131 // } 2132 // Prepare arguments and build a call to __kmpc_master 2133 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2134 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2135 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2136 /*Conditional=*/true); 2137 MasterOpGen.setAction(Action); 2138 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2139 Action.Done(CGF); 2140 } 2141 2142 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2143 SourceLocation Loc) { 2144 if (!CGF.HaveInsertPoint()) 2145 return; 2146 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2147 llvm::Value *Args[] = { 2148 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2149 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2150 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2151 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2152 Region->emitUntiedSwitch(CGF); 2153 } 2154 2155 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2156 const RegionCodeGenTy &TaskgroupOpGen, 2157 SourceLocation Loc) { 2158 if (!CGF.HaveInsertPoint()) 2159 return; 2160 // __kmpc_taskgroup(ident_t *, gtid); 2161 // TaskgroupOpGen(); 2162 // __kmpc_end_taskgroup(ident_t *, gtid); 2163 // Prepare arguments and build a call to __kmpc_taskgroup 2164 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2165 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2166 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2167 Args); 2168 TaskgroupOpGen.setAction(Action); 2169 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2170 } 2171 2172 /// Given an array of pointers to variables, project the address of a 2173 /// given variable. 2174 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2175 unsigned Index, const VarDecl *Var) { 2176 // Pull out the pointer to the variable. 2177 Address PtrAddr = 2178 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2179 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2180 2181 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2182 Addr = CGF.Builder.CreateElementBitCast( 2183 Addr, CGF.ConvertTypeForMem(Var->getType())); 2184 return Addr; 2185 } 2186 2187 static llvm::Value *emitCopyprivateCopyFunction( 2188 CodeGenModule &CGM, llvm::Type *ArgsType, 2189 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2190 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 2191 auto &C = CGM.getContext(); 2192 // void copy_func(void *LHSArg, void *RHSArg); 2193 FunctionArgList Args; 2194 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2195 C.VoidPtrTy); 2196 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2197 C.VoidPtrTy); 2198 Args.push_back(&LHSArg); 2199 Args.push_back(&RHSArg); 2200 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2201 auto *Fn = llvm::Function::Create( 2202 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2203 ".omp.copyprivate.copy_func", &CGM.getModule()); 2204 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2205 CodeGenFunction CGF(CGM); 2206 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2207 // Dest = (void*[n])(LHSArg); 2208 // Src = (void*[n])(RHSArg); 2209 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2210 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2211 ArgsType), CGF.getPointerAlign()); 2212 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2213 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2214 ArgsType), CGF.getPointerAlign()); 2215 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2216 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2217 // ... 2218 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2219 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2220 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2221 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2222 2223 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2224 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2225 2226 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2227 QualType Type = VD->getType(); 2228 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2229 } 2230 CGF.FinishFunction(); 2231 return Fn; 2232 } 2233 2234 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2235 const RegionCodeGenTy &SingleOpGen, 2236 SourceLocation Loc, 2237 ArrayRef<const Expr *> CopyprivateVars, 2238 ArrayRef<const Expr *> SrcExprs, 2239 ArrayRef<const Expr *> DstExprs, 2240 ArrayRef<const Expr *> AssignmentOps) { 2241 if (!CGF.HaveInsertPoint()) 2242 return; 2243 assert(CopyprivateVars.size() == SrcExprs.size() && 2244 CopyprivateVars.size() == DstExprs.size() && 2245 CopyprivateVars.size() == AssignmentOps.size()); 2246 auto &C = CGM.getContext(); 2247 // int32 did_it = 0; 2248 // if(__kmpc_single(ident_t *, gtid)) { 2249 // SingleOpGen(); 2250 // __kmpc_end_single(ident_t *, gtid); 2251 // did_it = 1; 2252 // } 2253 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2254 // <copy_func>, did_it); 2255 2256 Address DidIt = Address::invalid(); 2257 if (!CopyprivateVars.empty()) { 2258 // int32 did_it = 0; 2259 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2260 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2261 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2262 } 2263 // Prepare arguments and build a call to __kmpc_single 2264 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2265 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2266 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2267 /*Conditional=*/true); 2268 SingleOpGen.setAction(Action); 2269 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2270 if (DidIt.isValid()) { 2271 // did_it = 1; 2272 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2273 } 2274 Action.Done(CGF); 2275 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2276 // <copy_func>, did_it); 2277 if (DidIt.isValid()) { 2278 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2279 auto CopyprivateArrayTy = 2280 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2281 /*IndexTypeQuals=*/0); 2282 // Create a list of all private variables for copyprivate. 2283 Address CopyprivateList = 2284 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2285 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2286 Address Elem = CGF.Builder.CreateConstArrayGEP( 2287 CopyprivateList, I, CGF.getPointerSize()); 2288 CGF.Builder.CreateStore( 2289 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2290 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2291 Elem); 2292 } 2293 // Build function that copies private values from single region to all other 2294 // threads in the corresponding parallel region. 2295 auto *CpyFn = emitCopyprivateCopyFunction( 2296 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2297 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 2298 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2299 Address CL = 2300 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2301 CGF.VoidPtrTy); 2302 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2303 llvm::Value *Args[] = { 2304 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2305 getThreadID(CGF, Loc), // i32 <gtid> 2306 BufSize, // size_t <buf_size> 2307 CL.getPointer(), // void *<copyprivate list> 2308 CpyFn, // void (*) (void *, void *) <copy_func> 2309 DidItVal // i32 did_it 2310 }; 2311 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2312 } 2313 } 2314 2315 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2316 const RegionCodeGenTy &OrderedOpGen, 2317 SourceLocation Loc, bool IsThreads) { 2318 if (!CGF.HaveInsertPoint()) 2319 return; 2320 // __kmpc_ordered(ident_t *, gtid); 2321 // OrderedOpGen(); 2322 // __kmpc_end_ordered(ident_t *, gtid); 2323 // Prepare arguments and build a call to __kmpc_ordered 2324 if (IsThreads) { 2325 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2326 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2327 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2328 Args); 2329 OrderedOpGen.setAction(Action); 2330 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2331 return; 2332 } 2333 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2334 } 2335 2336 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2337 OpenMPDirectiveKind Kind, bool EmitChecks, 2338 bool ForceSimpleCall) { 2339 if (!CGF.HaveInsertPoint()) 2340 return; 2341 // Build call __kmpc_cancel_barrier(loc, thread_id); 2342 // Build call __kmpc_barrier(loc, thread_id); 2343 unsigned Flags; 2344 if (Kind == OMPD_for) 2345 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2346 else if (Kind == OMPD_sections) 2347 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2348 else if (Kind == OMPD_single) 2349 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2350 else if (Kind == OMPD_barrier) 2351 Flags = OMP_IDENT_BARRIER_EXPL; 2352 else 2353 Flags = OMP_IDENT_BARRIER_IMPL; 2354 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2355 // thread_id); 2356 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2357 getThreadID(CGF, Loc)}; 2358 if (auto *OMPRegionInfo = 2359 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2360 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2361 auto *Result = CGF.EmitRuntimeCall( 2362 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2363 if (EmitChecks) { 2364 // if (__kmpc_cancel_barrier()) { 2365 // exit from construct; 2366 // } 2367 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2368 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2369 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2370 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2371 CGF.EmitBlock(ExitBB); 2372 // exit from construct; 2373 auto CancelDestination = 2374 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2375 CGF.EmitBranchThroughCleanup(CancelDestination); 2376 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2377 } 2378 return; 2379 } 2380 } 2381 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2382 } 2383 2384 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2385 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2386 bool Chunked, bool Ordered) { 2387 switch (ScheduleKind) { 2388 case OMPC_SCHEDULE_static: 2389 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2390 : (Ordered ? OMP_ord_static : OMP_sch_static); 2391 case OMPC_SCHEDULE_dynamic: 2392 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2393 case OMPC_SCHEDULE_guided: 2394 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2395 case OMPC_SCHEDULE_runtime: 2396 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2397 case OMPC_SCHEDULE_auto: 2398 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2399 case OMPC_SCHEDULE_unknown: 2400 assert(!Chunked && "chunk was specified but schedule kind not known"); 2401 return Ordered ? OMP_ord_static : OMP_sch_static; 2402 } 2403 llvm_unreachable("Unexpected runtime schedule"); 2404 } 2405 2406 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 2407 static OpenMPSchedType 2408 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2409 // only static is allowed for dist_schedule 2410 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2411 } 2412 2413 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2414 bool Chunked) const { 2415 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2416 return Schedule == OMP_sch_static; 2417 } 2418 2419 bool CGOpenMPRuntime::isStaticNonchunked( 2420 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2421 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2422 return Schedule == OMP_dist_sch_static; 2423 } 2424 2425 2426 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2427 auto Schedule = 2428 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2429 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2430 return Schedule != OMP_sch_static; 2431 } 2432 2433 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 2434 OpenMPScheduleClauseModifier M1, 2435 OpenMPScheduleClauseModifier M2) { 2436 int Modifier = 0; 2437 switch (M1) { 2438 case OMPC_SCHEDULE_MODIFIER_monotonic: 2439 Modifier = OMP_sch_modifier_monotonic; 2440 break; 2441 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2442 Modifier = OMP_sch_modifier_nonmonotonic; 2443 break; 2444 case OMPC_SCHEDULE_MODIFIER_simd: 2445 if (Schedule == OMP_sch_static_chunked) 2446 Schedule = OMP_sch_static_balanced_chunked; 2447 break; 2448 case OMPC_SCHEDULE_MODIFIER_last: 2449 case OMPC_SCHEDULE_MODIFIER_unknown: 2450 break; 2451 } 2452 switch (M2) { 2453 case OMPC_SCHEDULE_MODIFIER_monotonic: 2454 Modifier = OMP_sch_modifier_monotonic; 2455 break; 2456 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2457 Modifier = OMP_sch_modifier_nonmonotonic; 2458 break; 2459 case OMPC_SCHEDULE_MODIFIER_simd: 2460 if (Schedule == OMP_sch_static_chunked) 2461 Schedule = OMP_sch_static_balanced_chunked; 2462 break; 2463 case OMPC_SCHEDULE_MODIFIER_last: 2464 case OMPC_SCHEDULE_MODIFIER_unknown: 2465 break; 2466 } 2467 return Schedule | Modifier; 2468 } 2469 2470 void CGOpenMPRuntime::emitForDispatchInit( 2471 CodeGenFunction &CGF, SourceLocation Loc, 2472 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2473 bool Ordered, const DispatchRTInput &DispatchValues) { 2474 if (!CGF.HaveInsertPoint()) 2475 return; 2476 OpenMPSchedType Schedule = getRuntimeSchedule( 2477 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2478 assert(Ordered || 2479 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2480 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2481 Schedule != OMP_sch_static_balanced_chunked)); 2482 // Call __kmpc_dispatch_init( 2483 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2484 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2485 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2486 2487 // If the Chunk was not specified in the clause - use default value 1. 2488 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2489 : CGF.Builder.getIntN(IVSize, 1); 2490 llvm::Value *Args[] = { 2491 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2492 CGF.Builder.getInt32(addMonoNonMonoModifier( 2493 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2494 DispatchValues.LB, // Lower 2495 DispatchValues.UB, // Upper 2496 CGF.Builder.getIntN(IVSize, 1), // Stride 2497 Chunk // Chunk 2498 }; 2499 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2500 } 2501 2502 static void emitForStaticInitCall( 2503 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2504 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 2505 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2506 unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, 2507 Address ST, llvm::Value *Chunk) { 2508 if (!CGF.HaveInsertPoint()) 2509 return; 2510 2511 assert(!Ordered); 2512 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2513 Schedule == OMP_sch_static_balanced_chunked || 2514 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2515 Schedule == OMP_dist_sch_static || 2516 Schedule == OMP_dist_sch_static_chunked); 2517 2518 // Call __kmpc_for_static_init( 2519 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2520 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2521 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2522 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2523 if (Chunk == nullptr) { 2524 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2525 Schedule == OMP_dist_sch_static) && 2526 "expected static non-chunked schedule"); 2527 // If the Chunk was not specified in the clause - use default value 1. 2528 Chunk = CGF.Builder.getIntN(IVSize, 1); 2529 } else { 2530 assert((Schedule == OMP_sch_static_chunked || 2531 Schedule == OMP_sch_static_balanced_chunked || 2532 Schedule == OMP_ord_static_chunked || 2533 Schedule == OMP_dist_sch_static_chunked) && 2534 "expected static chunked schedule"); 2535 } 2536 llvm::Value *Args[] = { 2537 UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( 2538 Schedule, M1, M2)), // Schedule type 2539 IL.getPointer(), // &isLastIter 2540 LB.getPointer(), // &LB 2541 UB.getPointer(), // &UB 2542 ST.getPointer(), // &Stride 2543 CGF.Builder.getIntN(IVSize, 1), // Incr 2544 Chunk // Chunk 2545 }; 2546 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2547 } 2548 2549 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2550 SourceLocation Loc, 2551 const OpenMPScheduleTy &ScheduleKind, 2552 unsigned IVSize, bool IVSigned, 2553 bool Ordered, Address IL, Address LB, 2554 Address UB, Address ST, 2555 llvm::Value *Chunk) { 2556 OpenMPSchedType ScheduleNum = 2557 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 2558 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2559 auto *ThreadId = getThreadID(CGF, Loc); 2560 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2561 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2562 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, 2563 Ordered, IL, LB, UB, ST, Chunk); 2564 } 2565 2566 void CGOpenMPRuntime::emitDistributeStaticInit( 2567 CodeGenFunction &CGF, SourceLocation Loc, 2568 OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, 2569 bool Ordered, Address IL, Address LB, Address UB, Address ST, 2570 llvm::Value *Chunk) { 2571 OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); 2572 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2573 auto *ThreadId = getThreadID(CGF, Loc); 2574 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2575 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2576 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2577 OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, 2578 UB, ST, Chunk); 2579 } 2580 2581 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2582 SourceLocation Loc) { 2583 if (!CGF.HaveInsertPoint()) 2584 return; 2585 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2586 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2587 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 2588 Args); 2589 } 2590 2591 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2592 SourceLocation Loc, 2593 unsigned IVSize, 2594 bool IVSigned) { 2595 if (!CGF.HaveInsertPoint()) 2596 return; 2597 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2598 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2599 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2600 } 2601 2602 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2603 SourceLocation Loc, unsigned IVSize, 2604 bool IVSigned, Address IL, 2605 Address LB, Address UB, 2606 Address ST) { 2607 // Call __kmpc_dispatch_next( 2608 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2609 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2610 // kmp_int[32|64] *p_stride); 2611 llvm::Value *Args[] = { 2612 emitUpdateLocation(CGF, Loc), 2613 getThreadID(CGF, Loc), 2614 IL.getPointer(), // &isLastIter 2615 LB.getPointer(), // &Lower 2616 UB.getPointer(), // &Upper 2617 ST.getPointer() // &Stride 2618 }; 2619 llvm::Value *Call = 2620 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2621 return CGF.EmitScalarConversion( 2622 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 2623 CGF.getContext().BoolTy, Loc); 2624 } 2625 2626 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2627 llvm::Value *NumThreads, 2628 SourceLocation Loc) { 2629 if (!CGF.HaveInsertPoint()) 2630 return; 2631 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2632 llvm::Value *Args[] = { 2633 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2634 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2635 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 2636 Args); 2637 } 2638 2639 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2640 OpenMPProcBindClauseKind ProcBind, 2641 SourceLocation Loc) { 2642 if (!CGF.HaveInsertPoint()) 2643 return; 2644 // Constants for proc bind value accepted by the runtime. 2645 enum ProcBindTy { 2646 ProcBindFalse = 0, 2647 ProcBindTrue, 2648 ProcBindMaster, 2649 ProcBindClose, 2650 ProcBindSpread, 2651 ProcBindIntel, 2652 ProcBindDefault 2653 } RuntimeProcBind; 2654 switch (ProcBind) { 2655 case OMPC_PROC_BIND_master: 2656 RuntimeProcBind = ProcBindMaster; 2657 break; 2658 case OMPC_PROC_BIND_close: 2659 RuntimeProcBind = ProcBindClose; 2660 break; 2661 case OMPC_PROC_BIND_spread: 2662 RuntimeProcBind = ProcBindSpread; 2663 break; 2664 case OMPC_PROC_BIND_unknown: 2665 llvm_unreachable("Unsupported proc_bind value."); 2666 } 2667 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2668 llvm::Value *Args[] = { 2669 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2670 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 2671 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 2672 } 2673 2674 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2675 SourceLocation Loc) { 2676 if (!CGF.HaveInsertPoint()) 2677 return; 2678 // Build call void __kmpc_flush(ident_t *loc) 2679 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 2680 emitUpdateLocation(CGF, Loc)); 2681 } 2682 2683 namespace { 2684 /// \brief Indexes of fields for type kmp_task_t. 2685 enum KmpTaskTFields { 2686 /// \brief List of shared variables. 2687 KmpTaskTShareds, 2688 /// \brief Task routine. 2689 KmpTaskTRoutine, 2690 /// \brief Partition id for the untied tasks. 2691 KmpTaskTPartId, 2692 /// Function with call of destructors for private variables. 2693 Data1, 2694 /// Task priority. 2695 Data2, 2696 /// (Taskloops only) Lower bound. 2697 KmpTaskTLowerBound, 2698 /// (Taskloops only) Upper bound. 2699 KmpTaskTUpperBound, 2700 /// (Taskloops only) Stride. 2701 KmpTaskTStride, 2702 /// (Taskloops only) Is last iteration flag. 2703 KmpTaskTLastIter, 2704 }; 2705 } // anonymous namespace 2706 2707 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2708 // FIXME: Add other entries type when they become supported. 2709 return OffloadEntriesTargetRegion.empty(); 2710 } 2711 2712 /// \brief Initialize target region entry. 2713 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2714 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2715 StringRef ParentName, unsigned LineNum, 2716 unsigned Order) { 2717 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2718 "only required for the device " 2719 "code generation."); 2720 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2721 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2722 /*Flags=*/0); 2723 ++OffloadingEntriesNum; 2724 } 2725 2726 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2727 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2728 StringRef ParentName, unsigned LineNum, 2729 llvm::Constant *Addr, llvm::Constant *ID, 2730 int32_t Flags) { 2731 // If we are emitting code for a target, the entry is already initialized, 2732 // only has to be registered. 2733 if (CGM.getLangOpts().OpenMPIsDevice) { 2734 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2735 "Entry must exist."); 2736 auto &Entry = 2737 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2738 assert(Entry.isValid() && "Entry not initialized!"); 2739 Entry.setAddress(Addr); 2740 Entry.setID(ID); 2741 Entry.setFlags(Flags); 2742 return; 2743 } else { 2744 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); 2745 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2746 } 2747 } 2748 2749 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2750 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2751 unsigned LineNum) const { 2752 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2753 if (PerDevice == OffloadEntriesTargetRegion.end()) 2754 return false; 2755 auto PerFile = PerDevice->second.find(FileID); 2756 if (PerFile == PerDevice->second.end()) 2757 return false; 2758 auto PerParentName = PerFile->second.find(ParentName); 2759 if (PerParentName == PerFile->second.end()) 2760 return false; 2761 auto PerLine = PerParentName->second.find(LineNum); 2762 if (PerLine == PerParentName->second.end()) 2763 return false; 2764 // Fail if this entry is already registered. 2765 if (PerLine->second.getAddress() || PerLine->second.getID()) 2766 return false; 2767 return true; 2768 } 2769 2770 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2771 const OffloadTargetRegionEntryInfoActTy &Action) { 2772 // Scan all target region entries and perform the provided action. 2773 for (auto &D : OffloadEntriesTargetRegion) 2774 for (auto &F : D.second) 2775 for (auto &P : F.second) 2776 for (auto &L : P.second) 2777 Action(D.first, F.first, P.first(), L.first, L.second); 2778 } 2779 2780 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 2781 /// \a Codegen. This is used to emit the two functions that register and 2782 /// unregister the descriptor of the current compilation unit. 2783 static llvm::Function * 2784 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 2785 const RegionCodeGenTy &Codegen) { 2786 auto &C = CGM.getContext(); 2787 FunctionArgList Args; 2788 ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), 2789 /*Id=*/nullptr, C.VoidPtrTy); 2790 Args.push_back(&DummyPtr); 2791 2792 CodeGenFunction CGF(CGM); 2793 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2794 auto FTy = CGM.getTypes().GetFunctionType(FI); 2795 auto *Fn = 2796 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 2797 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 2798 Codegen(CGF); 2799 CGF.FinishFunction(); 2800 return Fn; 2801 } 2802 2803 llvm::Function * 2804 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 2805 2806 // If we don't have entries or if we are emitting code for the device, we 2807 // don't need to do anything. 2808 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 2809 return nullptr; 2810 2811 auto &M = CGM.getModule(); 2812 auto &C = CGM.getContext(); 2813 2814 // Get list of devices we care about 2815 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 2816 2817 // We should be creating an offloading descriptor only if there are devices 2818 // specified. 2819 assert(!Devices.empty() && "No OpenMP offloading devices??"); 2820 2821 // Create the external variables that will point to the begin and end of the 2822 // host entries section. These will be defined by the linker. 2823 auto *OffloadEntryTy = 2824 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 2825 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 2826 M, OffloadEntryTy, /*isConstant=*/true, 2827 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2828 ".omp_offloading.entries_begin"); 2829 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 2830 M, OffloadEntryTy, /*isConstant=*/true, 2831 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2832 ".omp_offloading.entries_end"); 2833 2834 // Create all device images 2835 auto *DeviceImageTy = cast<llvm::StructType>( 2836 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 2837 ConstantInitBuilder DeviceImagesBuilder(CGM); 2838 auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); 2839 2840 for (unsigned i = 0; i < Devices.size(); ++i) { 2841 StringRef T = Devices[i].getTriple(); 2842 auto *ImgBegin = new llvm::GlobalVariable( 2843 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2844 /*Initializer=*/nullptr, 2845 Twine(".omp_offloading.img_start.") + Twine(T)); 2846 auto *ImgEnd = new llvm::GlobalVariable( 2847 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2848 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 2849 2850 auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); 2851 Dev.add(ImgBegin); 2852 Dev.add(ImgEnd); 2853 Dev.add(HostEntriesBegin); 2854 Dev.add(HostEntriesEnd); 2855 Dev.finishAndAddTo(DeviceImagesEntries); 2856 } 2857 2858 // Create device images global array. 2859 llvm::GlobalVariable *DeviceImages = 2860 DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", 2861 CGM.getPointerAlign(), 2862 /*isConstant=*/true); 2863 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2864 2865 // This is a Zero array to be used in the creation of the constant expressions 2866 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 2867 llvm::Constant::getNullValue(CGM.Int32Ty)}; 2868 2869 // Create the target region descriptor. 2870 auto *BinaryDescriptorTy = cast<llvm::StructType>( 2871 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 2872 ConstantInitBuilder DescBuilder(CGM); 2873 auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); 2874 DescInit.addInt(CGM.Int32Ty, Devices.size()); 2875 DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 2876 DeviceImages, 2877 Index)); 2878 DescInit.add(HostEntriesBegin); 2879 DescInit.add(HostEntriesEnd); 2880 2881 auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", 2882 CGM.getPointerAlign(), 2883 /*isConstant=*/true); 2884 2885 // Emit code to register or unregister the descriptor at execution 2886 // startup or closing, respectively. 2887 2888 // Create a variable to drive the registration and unregistration of the 2889 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 2890 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 2891 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 2892 IdentInfo, C.CharTy); 2893 2894 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 2895 CGM, ".omp_offloading.descriptor_unreg", 2896 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2897 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 2898 Desc); 2899 }); 2900 auto *RegFn = createOffloadingBinaryDescriptorFunction( 2901 CGM, ".omp_offloading.descriptor_reg", 2902 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2903 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 2904 Desc); 2905 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 2906 }); 2907 if (CGM.supportsCOMDAT()) { 2908 // It is sufficient to call registration function only once, so create a 2909 // COMDAT group for registration/unregistration functions and associated 2910 // data. That would reduce startup time and code size. Registration 2911 // function serves as a COMDAT group key. 2912 auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); 2913 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 2914 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 2915 RegFn->setComdat(ComdatKey); 2916 UnRegFn->setComdat(ComdatKey); 2917 DeviceImages->setComdat(ComdatKey); 2918 Desc->setComdat(ComdatKey); 2919 } 2920 return RegFn; 2921 } 2922 2923 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 2924 llvm::Constant *Addr, uint64_t Size, 2925 int32_t Flags) { 2926 StringRef Name = Addr->getName(); 2927 auto *TgtOffloadEntryType = cast<llvm::StructType>( 2928 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 2929 llvm::LLVMContext &C = CGM.getModule().getContext(); 2930 llvm::Module &M = CGM.getModule(); 2931 2932 // Make sure the address has the right type. 2933 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 2934 2935 // Create constant string with the name. 2936 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 2937 2938 llvm::GlobalVariable *Str = 2939 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 2940 llvm::GlobalValue::InternalLinkage, StrPtrInit, 2941 ".omp_offloading.entry_name"); 2942 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2943 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 2944 2945 // We can't have any padding between symbols, so we need to have 1-byte 2946 // alignment. 2947 auto Align = CharUnits::fromQuantity(1); 2948 2949 // Create the entry struct. 2950 ConstantInitBuilder EntryBuilder(CGM); 2951 auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); 2952 EntryInit.add(AddrPtr); 2953 EntryInit.add(StrPtr); 2954 EntryInit.addInt(CGM.SizeTy, Size); 2955 EntryInit.addInt(CGM.Int32Ty, Flags); 2956 EntryInit.addInt(CGM.Int32Ty, 0); 2957 llvm::GlobalVariable *Entry = 2958 EntryInit.finishAndCreateGlobal(".omp_offloading.entry", 2959 Align, 2960 /*constant*/ true, 2961 llvm::GlobalValue::ExternalLinkage); 2962 2963 // The entry has to be created in the section the linker expects it to be. 2964 Entry->setSection(".omp_offloading.entries"); 2965 } 2966 2967 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2968 // Emit the offloading entries and metadata so that the device codegen side 2969 // can easily figure out what to emit. The produced metadata looks like 2970 // this: 2971 // 2972 // !omp_offload.info = !{!1, ...} 2973 // 2974 // Right now we only generate metadata for function that contain target 2975 // regions. 2976 2977 // If we do not have entries, we dont need to do anything. 2978 if (OffloadEntriesInfoManager.empty()) 2979 return; 2980 2981 llvm::Module &M = CGM.getModule(); 2982 llvm::LLVMContext &C = M.getContext(); 2983 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 2984 OrderedEntries(OffloadEntriesInfoManager.size()); 2985 2986 // Create the offloading info metadata node. 2987 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 2988 2989 // Auxiliary methods to create metadata values and strings. 2990 auto getMDInt = [&](unsigned v) { 2991 return llvm::ConstantAsMetadata::get( 2992 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 2993 }; 2994 2995 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 2996 2997 // Create function that emits metadata for each target region entry; 2998 auto &&TargetRegionMetadataEmitter = [&]( 2999 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 3000 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3001 llvm::SmallVector<llvm::Metadata *, 32> Ops; 3002 // Generate metadata for target regions. Each entry of this metadata 3003 // contains: 3004 // - Entry 0 -> Kind of this type of metadata (0). 3005 // - Entry 1 -> Device ID of the file where the entry was identified. 3006 // - Entry 2 -> File ID of the file where the entry was identified. 3007 // - Entry 3 -> Mangled name of the function where the entry was identified. 3008 // - Entry 4 -> Line in the file where the entry was identified. 3009 // - Entry 5 -> Order the entry was created. 3010 // The first element of the metadata node is the kind. 3011 Ops.push_back(getMDInt(E.getKind())); 3012 Ops.push_back(getMDInt(DeviceID)); 3013 Ops.push_back(getMDInt(FileID)); 3014 Ops.push_back(getMDString(ParentName)); 3015 Ops.push_back(getMDInt(Line)); 3016 Ops.push_back(getMDInt(E.getOrder())); 3017 3018 // Save this entry in the right position of the ordered entries array. 3019 OrderedEntries[E.getOrder()] = &E; 3020 3021 // Add metadata to the named metadata node. 3022 MD->addOperand(llvm::MDNode::get(C, Ops)); 3023 }; 3024 3025 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3026 TargetRegionMetadataEmitter); 3027 3028 for (auto *E : OrderedEntries) { 3029 assert(E && "All ordered entries must exist!"); 3030 if (auto *CE = 3031 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3032 E)) { 3033 assert(CE->getID() && CE->getAddress() && 3034 "Entry ID and Addr are invalid!"); 3035 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 3036 } else 3037 llvm_unreachable("Unsupported entry kind."); 3038 } 3039 } 3040 3041 /// \brief Loads all the offload entries information from the host IR 3042 /// metadata. 3043 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3044 // If we are in target mode, load the metadata from the host IR. This code has 3045 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3046 3047 if (!CGM.getLangOpts().OpenMPIsDevice) 3048 return; 3049 3050 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3051 return; 3052 3053 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3054 if (Buf.getError()) 3055 return; 3056 3057 llvm::LLVMContext C; 3058 auto ME = expectedToErrorOrAndEmitErrors( 3059 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3060 3061 if (ME.getError()) 3062 return; 3063 3064 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3065 if (!MD) 3066 return; 3067 3068 for (auto I : MD->operands()) { 3069 llvm::MDNode *MN = cast<llvm::MDNode>(I); 3070 3071 auto getMDInt = [&](unsigned Idx) { 3072 llvm::ConstantAsMetadata *V = 3073 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3074 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3075 }; 3076 3077 auto getMDString = [&](unsigned Idx) { 3078 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3079 return V->getString(); 3080 }; 3081 3082 switch (getMDInt(0)) { 3083 default: 3084 llvm_unreachable("Unexpected metadata!"); 3085 break; 3086 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3087 OFFLOAD_ENTRY_INFO_TARGET_REGION: 3088 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3089 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 3090 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 3091 /*Order=*/getMDInt(5)); 3092 break; 3093 } 3094 } 3095 } 3096 3097 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3098 if (!KmpRoutineEntryPtrTy) { 3099 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3100 auto &C = CGM.getContext(); 3101 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3102 FunctionProtoType::ExtProtoInfo EPI; 3103 KmpRoutineEntryPtrQTy = C.getPointerType( 3104 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3105 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3106 } 3107 } 3108 3109 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 3110 QualType FieldTy) { 3111 auto *Field = FieldDecl::Create( 3112 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 3113 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 3114 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 3115 Field->setAccess(AS_public); 3116 DC->addDecl(Field); 3117 return Field; 3118 } 3119 3120 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3121 3122 // Make sure the type of the entry is already created. This is the type we 3123 // have to create: 3124 // struct __tgt_offload_entry{ 3125 // void *addr; // Pointer to the offload entry info. 3126 // // (function or global) 3127 // char *name; // Name of the function or global. 3128 // size_t size; // Size of the entry info (0 if it a function). 3129 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3130 // int32_t reserved; // Reserved, to use by the runtime library. 3131 // }; 3132 if (TgtOffloadEntryQTy.isNull()) { 3133 ASTContext &C = CGM.getContext(); 3134 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3135 RD->startDefinition(); 3136 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3137 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3138 addFieldToRecordDecl(C, RD, C.getSizeType()); 3139 addFieldToRecordDecl( 3140 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3141 addFieldToRecordDecl( 3142 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3143 RD->completeDefinition(); 3144 TgtOffloadEntryQTy = C.getRecordType(RD); 3145 } 3146 return TgtOffloadEntryQTy; 3147 } 3148 3149 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 3150 // These are the types we need to build: 3151 // struct __tgt_device_image{ 3152 // void *ImageStart; // Pointer to the target code start. 3153 // void *ImageEnd; // Pointer to the target code end. 3154 // // We also add the host entries to the device image, as it may be useful 3155 // // for the target runtime to have access to that information. 3156 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 3157 // // the entries. 3158 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3159 // // entries (non inclusive). 3160 // }; 3161 if (TgtDeviceImageQTy.isNull()) { 3162 ASTContext &C = CGM.getContext(); 3163 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 3164 RD->startDefinition(); 3165 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3166 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3167 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3168 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3169 RD->completeDefinition(); 3170 TgtDeviceImageQTy = C.getRecordType(RD); 3171 } 3172 return TgtDeviceImageQTy; 3173 } 3174 3175 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 3176 // struct __tgt_bin_desc{ 3177 // int32_t NumDevices; // Number of devices supported. 3178 // __tgt_device_image *DeviceImages; // Arrays of device images 3179 // // (one per device). 3180 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 3181 // // entries. 3182 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3183 // // entries (non inclusive). 3184 // }; 3185 if (TgtBinaryDescriptorQTy.isNull()) { 3186 ASTContext &C = CGM.getContext(); 3187 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 3188 RD->startDefinition(); 3189 addFieldToRecordDecl( 3190 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3191 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 3192 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3193 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3194 RD->completeDefinition(); 3195 TgtBinaryDescriptorQTy = C.getRecordType(RD); 3196 } 3197 return TgtBinaryDescriptorQTy; 3198 } 3199 3200 namespace { 3201 struct PrivateHelpersTy { 3202 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 3203 const VarDecl *PrivateElemInit) 3204 : Original(Original), PrivateCopy(PrivateCopy), 3205 PrivateElemInit(PrivateElemInit) {} 3206 const VarDecl *Original; 3207 const VarDecl *PrivateCopy; 3208 const VarDecl *PrivateElemInit; 3209 }; 3210 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3211 } // anonymous namespace 3212 3213 static RecordDecl * 3214 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3215 if (!Privates.empty()) { 3216 auto &C = CGM.getContext(); 3217 // Build struct .kmp_privates_t. { 3218 // /* private vars */ 3219 // }; 3220 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 3221 RD->startDefinition(); 3222 for (auto &&Pair : Privates) { 3223 auto *VD = Pair.second.Original; 3224 auto Type = VD->getType(); 3225 Type = Type.getNonReferenceType(); 3226 auto *FD = addFieldToRecordDecl(C, RD, Type); 3227 if (VD->hasAttrs()) { 3228 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3229 E(VD->getAttrs().end()); 3230 I != E; ++I) 3231 FD->addAttr(*I); 3232 } 3233 } 3234 RD->completeDefinition(); 3235 return RD; 3236 } 3237 return nullptr; 3238 } 3239 3240 static RecordDecl * 3241 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3242 QualType KmpInt32Ty, 3243 QualType KmpRoutineEntryPointerQTy) { 3244 auto &C = CGM.getContext(); 3245 // Build struct kmp_task_t { 3246 // void * shareds; 3247 // kmp_routine_entry_t routine; 3248 // kmp_int32 part_id; 3249 // kmp_cmplrdata_t data1; 3250 // kmp_cmplrdata_t data2; 3251 // For taskloops additional fields: 3252 // kmp_uint64 lb; 3253 // kmp_uint64 ub; 3254 // kmp_int64 st; 3255 // kmp_int32 liter; 3256 // }; 3257 auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3258 UD->startDefinition(); 3259 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3260 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3261 UD->completeDefinition(); 3262 QualType KmpCmplrdataTy = C.getRecordType(UD); 3263 auto *RD = C.buildImplicitRecord("kmp_task_t"); 3264 RD->startDefinition(); 3265 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3266 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3267 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3268 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3269 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3270 if (isOpenMPTaskLoopDirective(Kind)) { 3271 QualType KmpUInt64Ty = 3272 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3273 QualType KmpInt64Ty = 3274 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3275 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3276 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3277 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3278 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3279 } 3280 RD->completeDefinition(); 3281 return RD; 3282 } 3283 3284 static RecordDecl * 3285 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3286 ArrayRef<PrivateDataTy> Privates) { 3287 auto &C = CGM.getContext(); 3288 // Build struct kmp_task_t_with_privates { 3289 // kmp_task_t task_data; 3290 // .kmp_privates_t. privates; 3291 // }; 3292 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3293 RD->startDefinition(); 3294 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3295 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 3296 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3297 } 3298 RD->completeDefinition(); 3299 return RD; 3300 } 3301 3302 /// \brief Emit a proxy function which accepts kmp_task_t as the second 3303 /// argument. 3304 /// \code 3305 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3306 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3307 /// For taskloops: 3308 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3309 /// tt->shareds); 3310 /// return 0; 3311 /// } 3312 /// \endcode 3313 static llvm::Value * 3314 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3315 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3316 QualType KmpTaskTWithPrivatesPtrQTy, 3317 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3318 QualType SharedsPtrTy, llvm::Value *TaskFunction, 3319 llvm::Value *TaskPrivatesMap) { 3320 auto &C = CGM.getContext(); 3321 FunctionArgList Args; 3322 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3323 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3324 /*Id=*/nullptr, 3325 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3326 Args.push_back(&GtidArg); 3327 Args.push_back(&TaskTypeArg); 3328 auto &TaskEntryFnInfo = 3329 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3330 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3331 auto *TaskEntry = 3332 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 3333 ".omp_task_entry.", &CGM.getModule()); 3334 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 3335 CodeGenFunction CGF(CGM); 3336 CGF.disableDebugInfo(); 3337 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 3338 3339 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3340 // tt, 3341 // For taskloops: 3342 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3343 // tt->task_data.shareds); 3344 auto *GtidParam = CGF.EmitLoadOfScalar( 3345 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3346 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3347 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3348 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3349 auto *KmpTaskTWithPrivatesQTyRD = 3350 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3351 LValue Base = 3352 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3353 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3354 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3355 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3356 auto *PartidParam = PartIdLVal.getPointer(); 3357 3358 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3359 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3360 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3361 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 3362 CGF.ConvertTypeForMem(SharedsPtrTy)); 3363 3364 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3365 llvm::Value *PrivatesParam; 3366 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3367 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3368 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3369 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3370 } else 3371 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3372 3373 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3374 TaskPrivatesMap, 3375 CGF.Builder 3376 .CreatePointerBitCastOrAddrSpaceCast( 3377 TDBase.getAddress(), CGF.VoidPtrTy) 3378 .getPointer()}; 3379 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3380 std::end(CommonArgs)); 3381 if (isOpenMPTaskLoopDirective(Kind)) { 3382 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3383 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3384 auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); 3385 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3386 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3387 auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); 3388 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3389 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 3390 auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); 3391 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3392 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 3393 auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); 3394 CallArgs.push_back(LBParam); 3395 CallArgs.push_back(UBParam); 3396 CallArgs.push_back(StParam); 3397 CallArgs.push_back(LIParam); 3398 } 3399 CallArgs.push_back(SharedsParam); 3400 3401 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 3402 CGF.EmitStoreThroughLValue( 3403 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3404 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3405 CGF.FinishFunction(); 3406 return TaskEntry; 3407 } 3408 3409 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3410 SourceLocation Loc, 3411 QualType KmpInt32Ty, 3412 QualType KmpTaskTWithPrivatesPtrQTy, 3413 QualType KmpTaskTWithPrivatesQTy) { 3414 auto &C = CGM.getContext(); 3415 FunctionArgList Args; 3416 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3417 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3418 /*Id=*/nullptr, 3419 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3420 Args.push_back(&GtidArg); 3421 Args.push_back(&TaskTypeArg); 3422 FunctionType::ExtInfo Info; 3423 auto &DestructorFnInfo = 3424 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3425 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 3426 auto *DestructorFn = 3427 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3428 ".omp_task_destructor.", &CGM.getModule()); 3429 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 3430 DestructorFnInfo); 3431 CodeGenFunction CGF(CGM); 3432 CGF.disableDebugInfo(); 3433 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3434 Args); 3435 3436 LValue Base = CGF.EmitLoadOfPointerLValue( 3437 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3438 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3439 auto *KmpTaskTWithPrivatesQTyRD = 3440 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3441 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3442 Base = CGF.EmitLValueForField(Base, *FI); 3443 for (auto *Field : 3444 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3445 if (auto DtorKind = Field->getType().isDestructedType()) { 3446 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 3447 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3448 } 3449 } 3450 CGF.FinishFunction(); 3451 return DestructorFn; 3452 } 3453 3454 /// \brief Emit a privates mapping function for correct handling of private and 3455 /// firstprivate variables. 3456 /// \code 3457 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3458 /// **noalias priv1,..., <tyn> **noalias privn) { 3459 /// *priv1 = &.privates.priv1; 3460 /// ...; 3461 /// *privn = &.privates.privn; 3462 /// } 3463 /// \endcode 3464 static llvm::Value * 3465 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3466 ArrayRef<const Expr *> PrivateVars, 3467 ArrayRef<const Expr *> FirstprivateVars, 3468 ArrayRef<const Expr *> LastprivateVars, 3469 QualType PrivatesQTy, 3470 ArrayRef<PrivateDataTy> Privates) { 3471 auto &C = CGM.getContext(); 3472 FunctionArgList Args; 3473 ImplicitParamDecl TaskPrivatesArg( 3474 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3475 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 3476 Args.push_back(&TaskPrivatesArg); 3477 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3478 unsigned Counter = 1; 3479 for (auto *E: PrivateVars) { 3480 Args.push_back(ImplicitParamDecl::Create( 3481 C, /*DC=*/nullptr, Loc, 3482 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3483 .withConst() 3484 .withRestrict())); 3485 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3486 PrivateVarsPos[VD] = Counter; 3487 ++Counter; 3488 } 3489 for (auto *E : FirstprivateVars) { 3490 Args.push_back(ImplicitParamDecl::Create( 3491 C, /*DC=*/nullptr, Loc, 3492 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3493 .withConst() 3494 .withRestrict())); 3495 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3496 PrivateVarsPos[VD] = Counter; 3497 ++Counter; 3498 } 3499 for (auto *E: LastprivateVars) { 3500 Args.push_back(ImplicitParamDecl::Create( 3501 C, /*DC=*/nullptr, Loc, 3502 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3503 .withConst() 3504 .withRestrict())); 3505 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3506 PrivateVarsPos[VD] = Counter; 3507 ++Counter; 3508 } 3509 auto &TaskPrivatesMapFnInfo = 3510 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3511 auto *TaskPrivatesMapTy = 3512 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3513 auto *TaskPrivatesMap = llvm::Function::Create( 3514 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 3515 ".omp_task_privates_map.", &CGM.getModule()); 3516 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 3517 TaskPrivatesMapFnInfo); 3518 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3519 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3520 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3521 CodeGenFunction CGF(CGM); 3522 CGF.disableDebugInfo(); 3523 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3524 TaskPrivatesMapFnInfo, Args); 3525 3526 // *privi = &.privates.privi; 3527 LValue Base = CGF.EmitLoadOfPointerLValue( 3528 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3529 TaskPrivatesArg.getType()->castAs<PointerType>()); 3530 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3531 Counter = 0; 3532 for (auto *Field : PrivatesQTyRD->fields()) { 3533 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 3534 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3535 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3536 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3537 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3538 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 3539 ++Counter; 3540 } 3541 CGF.FinishFunction(); 3542 return TaskPrivatesMap; 3543 } 3544 3545 static int array_pod_sort_comparator(const PrivateDataTy *P1, 3546 const PrivateDataTy *P2) { 3547 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 3548 } 3549 3550 /// Emit initialization for private variables in task-based directives. 3551 static void emitPrivatesInit(CodeGenFunction &CGF, 3552 const OMPExecutableDirective &D, 3553 Address KmpTaskSharedsPtr, LValue TDBase, 3554 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3555 QualType SharedsTy, QualType SharedsPtrTy, 3556 const OMPTaskDataTy &Data, 3557 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3558 auto &C = CGF.getContext(); 3559 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3560 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3561 LValue SrcBase; 3562 if (!Data.FirstprivateVars.empty()) { 3563 SrcBase = CGF.MakeAddrLValue( 3564 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3565 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3566 SharedsTy); 3567 } 3568 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 3569 cast<CapturedStmt>(*D.getAssociatedStmt())); 3570 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3571 for (auto &&Pair : Privates) { 3572 auto *VD = Pair.second.PrivateCopy; 3573 auto *Init = VD->getAnyInitializer(); 3574 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3575 !CGF.isTrivialInitializer(Init)))) { 3576 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3577 if (auto *Elem = Pair.second.PrivateElemInit) { 3578 auto *OriginalVD = Pair.second.Original; 3579 auto *SharedField = CapturesInfo.lookup(OriginalVD); 3580 auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3581 SharedRefLValue = CGF.MakeAddrLValue( 3582 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 3583 SharedRefLValue.getType(), 3584 LValueBaseInfo(AlignmentSource::Decl, 3585 SharedRefLValue.getBaseInfo().getMayAlias())); 3586 QualType Type = OriginalVD->getType(); 3587 if (Type->isArrayType()) { 3588 // Initialize firstprivate array. 3589 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3590 // Perform simple memcpy. 3591 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 3592 SharedRefLValue.getAddress(), Type); 3593 } else { 3594 // Initialize firstprivate array using element-by-element 3595 // initialization. 3596 CGF.EmitOMPAggregateAssign( 3597 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 3598 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3599 Address SrcElement) { 3600 // Clean up any temporaries needed by the initialization. 3601 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3602 InitScope.addPrivate( 3603 Elem, [SrcElement]() -> Address { return SrcElement; }); 3604 (void)InitScope.Privatize(); 3605 // Emit initialization for single element. 3606 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3607 CGF, &CapturesInfo); 3608 CGF.EmitAnyExprToMem(Init, DestElement, 3609 Init->getType().getQualifiers(), 3610 /*IsInitializer=*/false); 3611 }); 3612 } 3613 } else { 3614 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3615 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 3616 return SharedRefLValue.getAddress(); 3617 }); 3618 (void)InitScope.Privatize(); 3619 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3620 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3621 /*capturedByInit=*/false); 3622 } 3623 } else 3624 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3625 } 3626 ++FI; 3627 } 3628 } 3629 3630 /// Check if duplication function is required for taskloops. 3631 static bool checkInitIsRequired(CodeGenFunction &CGF, 3632 ArrayRef<PrivateDataTy> Privates) { 3633 bool InitRequired = false; 3634 for (auto &&Pair : Privates) { 3635 auto *VD = Pair.second.PrivateCopy; 3636 auto *Init = VD->getAnyInitializer(); 3637 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3638 !CGF.isTrivialInitializer(Init)); 3639 } 3640 return InitRequired; 3641 } 3642 3643 3644 /// Emit task_dup function (for initialization of 3645 /// private/firstprivate/lastprivate vars and last_iter flag) 3646 /// \code 3647 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3648 /// lastpriv) { 3649 /// // setup lastprivate flag 3650 /// task_dst->last = lastpriv; 3651 /// // could be constructor calls here... 3652 /// } 3653 /// \endcode 3654 static llvm::Value * 3655 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3656 const OMPExecutableDirective &D, 3657 QualType KmpTaskTWithPrivatesPtrQTy, 3658 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3659 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3660 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3661 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3662 auto &C = CGM.getContext(); 3663 FunctionArgList Args; 3664 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, 3665 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3666 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, 3667 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3668 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, 3669 /*Id=*/nullptr, C.IntTy); 3670 Args.push_back(&DstArg); 3671 Args.push_back(&SrcArg); 3672 Args.push_back(&LastprivArg); 3673 auto &TaskDupFnInfo = 3674 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3675 auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3676 auto *TaskDup = 3677 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 3678 ".omp_task_dup.", &CGM.getModule()); 3679 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); 3680 CodeGenFunction CGF(CGM); 3681 CGF.disableDebugInfo(); 3682 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); 3683 3684 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3685 CGF.GetAddrOfLocalVar(&DstArg), 3686 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3687 // task_dst->liter = lastpriv; 3688 if (WithLastIter) { 3689 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3690 LValue Base = CGF.EmitLValueForField( 3691 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3692 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3693 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3694 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3695 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3696 } 3697 3698 // Emit initial values for private copies (if any). 3699 assert(!Privates.empty()); 3700 Address KmpTaskSharedsPtr = Address::invalid(); 3701 if (!Data.FirstprivateVars.empty()) { 3702 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3703 CGF.GetAddrOfLocalVar(&SrcArg), 3704 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3705 LValue Base = CGF.EmitLValueForField( 3706 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3707 KmpTaskSharedsPtr = Address( 3708 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3709 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3710 KmpTaskTShareds)), 3711 Loc), 3712 CGF.getNaturalTypeAlignment(SharedsTy)); 3713 } 3714 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3715 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3716 CGF.FinishFunction(); 3717 return TaskDup; 3718 } 3719 3720 /// Checks if destructor function is required to be generated. 3721 /// \return true if cleanups are required, false otherwise. 3722 static bool 3723 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 3724 bool NeedsCleanup = false; 3725 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3726 auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 3727 for (auto *FD : PrivateRD->fields()) { 3728 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 3729 if (NeedsCleanup) 3730 break; 3731 } 3732 return NeedsCleanup; 3733 } 3734 3735 CGOpenMPRuntime::TaskResultTy 3736 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3737 const OMPExecutableDirective &D, 3738 llvm::Value *TaskFunction, QualType SharedsTy, 3739 Address Shareds, const OMPTaskDataTy &Data) { 3740 auto &C = CGM.getContext(); 3741 llvm::SmallVector<PrivateDataTy, 4> Privates; 3742 // Aggregate privates and sort them by the alignment. 3743 auto I = Data.PrivateCopies.begin(); 3744 for (auto *E : Data.PrivateVars) { 3745 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3746 Privates.push_back(std::make_pair( 3747 C.getDeclAlign(VD), 3748 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3749 /*PrivateElemInit=*/nullptr))); 3750 ++I; 3751 } 3752 I = Data.FirstprivateCopies.begin(); 3753 auto IElemInitRef = Data.FirstprivateInits.begin(); 3754 for (auto *E : Data.FirstprivateVars) { 3755 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3756 Privates.push_back(std::make_pair( 3757 C.getDeclAlign(VD), 3758 PrivateHelpersTy( 3759 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3760 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 3761 ++I; 3762 ++IElemInitRef; 3763 } 3764 I = Data.LastprivateCopies.begin(); 3765 for (auto *E : Data.LastprivateVars) { 3766 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3767 Privates.push_back(std::make_pair( 3768 C.getDeclAlign(VD), 3769 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3770 /*PrivateElemInit=*/nullptr))); 3771 ++I; 3772 } 3773 llvm::array_pod_sort(Privates.begin(), Privates.end(), 3774 array_pod_sort_comparator); 3775 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3776 // Build type kmp_routine_entry_t (if not built yet). 3777 emitKmpRoutineEntryT(KmpInt32Ty); 3778 // Build type kmp_task_t (if not built yet). 3779 if (KmpTaskTQTy.isNull()) { 3780 KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3781 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3782 } 3783 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3784 // Build particular struct kmp_task_t for the given task. 3785 auto *KmpTaskTWithPrivatesQTyRD = 3786 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3787 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3788 QualType KmpTaskTWithPrivatesPtrQTy = 3789 C.getPointerType(KmpTaskTWithPrivatesQTy); 3790 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3791 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 3792 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3793 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3794 3795 // Emit initial values for private copies (if any). 3796 llvm::Value *TaskPrivatesMap = nullptr; 3797 auto *TaskPrivatesMapTy = 3798 std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); 3799 if (!Privates.empty()) { 3800 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3801 TaskPrivatesMap = emitTaskPrivateMappingFunction( 3802 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 3803 FI->getType(), Privates); 3804 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3805 TaskPrivatesMap, TaskPrivatesMapTy); 3806 } else { 3807 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3808 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3809 } 3810 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3811 // kmp_task_t *tt); 3812 auto *TaskEntry = emitProxyTaskFunction( 3813 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3814 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3815 TaskPrivatesMap); 3816 3817 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3818 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3819 // kmp_routine_entry_t *task_entry); 3820 // Task flags. Format is taken from 3821 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 3822 // description of kmp_tasking_flags struct. 3823 enum { 3824 TiedFlag = 0x1, 3825 FinalFlag = 0x2, 3826 DestructorsFlag = 0x8, 3827 PriorityFlag = 0x20 3828 }; 3829 unsigned Flags = Data.Tied ? TiedFlag : 0; 3830 bool NeedsCleanup = false; 3831 if (!Privates.empty()) { 3832 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 3833 if (NeedsCleanup) 3834 Flags = Flags | DestructorsFlag; 3835 } 3836 if (Data.Priority.getInt()) 3837 Flags = Flags | PriorityFlag; 3838 auto *TaskFlags = 3839 Data.Final.getPointer() 3840 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3841 CGF.Builder.getInt32(FinalFlag), 3842 CGF.Builder.getInt32(/*C=*/0)) 3843 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3844 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3845 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3846 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 3847 getThreadID(CGF, Loc), TaskFlags, 3848 KmpTaskTWithPrivatesTySize, SharedsSize, 3849 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3850 TaskEntry, KmpRoutineEntryPtrTy)}; 3851 auto *NewTask = CGF.EmitRuntimeCall( 3852 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 3853 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3854 NewTask, KmpTaskTWithPrivatesPtrTy); 3855 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 3856 KmpTaskTWithPrivatesQTy); 3857 LValue TDBase = 3858 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3859 // Fill the data in the resulting kmp_task_t record. 3860 // Copy shareds if there are any. 3861 Address KmpTaskSharedsPtr = Address::invalid(); 3862 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3863 KmpTaskSharedsPtr = 3864 Address(CGF.EmitLoadOfScalar( 3865 CGF.EmitLValueForField( 3866 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 3867 KmpTaskTShareds)), 3868 Loc), 3869 CGF.getNaturalTypeAlignment(SharedsTy)); 3870 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 3871 } 3872 // Emit initial values for private copies (if any). 3873 TaskResultTy Result; 3874 if (!Privates.empty()) { 3875 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3876 SharedsTy, SharedsPtrTy, Data, Privates, 3877 /*ForDup=*/false); 3878 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3879 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3880 Result.TaskDupFn = emitTaskDupFunction( 3881 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3882 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3883 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3884 } 3885 } 3886 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 3887 enum { Priority = 0, Destructors = 1 }; 3888 // Provide pointer to function with destructors for privates. 3889 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 3890 auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); 3891 if (NeedsCleanup) { 3892 llvm::Value *DestructorFn = emitDestructorsFunction( 3893 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3894 KmpTaskTWithPrivatesQTy); 3895 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 3896 LValue DestructorsLV = CGF.EmitLValueForField( 3897 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 3898 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3899 DestructorFn, KmpRoutineEntryPtrTy), 3900 DestructorsLV); 3901 } 3902 // Set priority. 3903 if (Data.Priority.getInt()) { 3904 LValue Data2LV = CGF.EmitLValueForField( 3905 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 3906 LValue PriorityLV = CGF.EmitLValueForField( 3907 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 3908 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 3909 } 3910 Result.NewTask = NewTask; 3911 Result.TaskEntry = TaskEntry; 3912 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3913 Result.TDBase = TDBase; 3914 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3915 return Result; 3916 } 3917 3918 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 3919 const OMPExecutableDirective &D, 3920 llvm::Value *TaskFunction, 3921 QualType SharedsTy, Address Shareds, 3922 const Expr *IfCond, 3923 const OMPTaskDataTy &Data) { 3924 if (!CGF.HaveInsertPoint()) 3925 return; 3926 3927 TaskResultTy Result = 3928 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 3929 llvm::Value *NewTask = Result.NewTask; 3930 llvm::Value *TaskEntry = Result.TaskEntry; 3931 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 3932 LValue TDBase = Result.TDBase; 3933 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 3934 auto &C = CGM.getContext(); 3935 // Process list of dependences. 3936 Address DependenciesArray = Address::invalid(); 3937 unsigned NumDependencies = Data.Dependences.size(); 3938 if (NumDependencies) { 3939 // Dependence kind for RTL. 3940 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 3941 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 3942 RecordDecl *KmpDependInfoRD; 3943 QualType FlagsTy = 3944 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 3945 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 3946 if (KmpDependInfoTy.isNull()) { 3947 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 3948 KmpDependInfoRD->startDefinition(); 3949 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 3950 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 3951 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 3952 KmpDependInfoRD->completeDefinition(); 3953 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 3954 } else 3955 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 3956 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 3957 // Define type kmp_depend_info[<Dependences.size()>]; 3958 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 3959 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 3960 ArrayType::Normal, /*IndexTypeQuals=*/0); 3961 // kmp_depend_info[<Dependences.size()>] deps; 3962 DependenciesArray = 3963 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 3964 for (unsigned i = 0; i < NumDependencies; ++i) { 3965 const Expr *E = Data.Dependences[i].second; 3966 auto Addr = CGF.EmitLValue(E); 3967 llvm::Value *Size; 3968 QualType Ty = E->getType(); 3969 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3970 LValue UpAddrLVal = 3971 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 3972 llvm::Value *UpAddr = 3973 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 3974 llvm::Value *LowIntPtr = 3975 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 3976 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 3977 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3978 } else 3979 Size = CGF.getTypeSize(Ty); 3980 auto Base = CGF.MakeAddrLValue( 3981 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 3982 KmpDependInfoTy); 3983 // deps[i].base_addr = &<Dependences[i].second>; 3984 auto BaseAddrLVal = CGF.EmitLValueForField( 3985 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 3986 CGF.EmitStoreOfScalar( 3987 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 3988 BaseAddrLVal); 3989 // deps[i].len = sizeof(<Dependences[i].second>); 3990 auto LenLVal = CGF.EmitLValueForField( 3991 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 3992 CGF.EmitStoreOfScalar(Size, LenLVal); 3993 // deps[i].flags = <Dependences[i].first>; 3994 RTLDependenceKindTy DepKind; 3995 switch (Data.Dependences[i].first) { 3996 case OMPC_DEPEND_in: 3997 DepKind = DepIn; 3998 break; 3999 // Out and InOut dependencies must use the same code. 4000 case OMPC_DEPEND_out: 4001 case OMPC_DEPEND_inout: 4002 DepKind = DepInOut; 4003 break; 4004 case OMPC_DEPEND_source: 4005 case OMPC_DEPEND_sink: 4006 case OMPC_DEPEND_unknown: 4007 llvm_unreachable("Unknown task dependence type"); 4008 } 4009 auto FlagsLVal = CGF.EmitLValueForField( 4010 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4011 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4012 FlagsLVal); 4013 } 4014 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4015 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 4016 CGF.VoidPtrTy); 4017 } 4018 4019 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4020 // libcall. 4021 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4022 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4023 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4024 // list is not empty 4025 auto *ThreadID = getThreadID(CGF, Loc); 4026 auto *UpLoc = emitUpdateLocation(CGF, Loc); 4027 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4028 llvm::Value *DepTaskArgs[7]; 4029 if (NumDependencies) { 4030 DepTaskArgs[0] = UpLoc; 4031 DepTaskArgs[1] = ThreadID; 4032 DepTaskArgs[2] = NewTask; 4033 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 4034 DepTaskArgs[4] = DependenciesArray.getPointer(); 4035 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4036 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4037 } 4038 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 4039 &TaskArgs, 4040 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4041 if (!Data.Tied) { 4042 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4043 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4044 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4045 } 4046 if (NumDependencies) { 4047 CGF.EmitRuntimeCall( 4048 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 4049 } else { 4050 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 4051 TaskArgs); 4052 } 4053 // Check if parent region is untied and build return for untied task; 4054 if (auto *Region = 4055 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4056 Region->emitUntiedSwitch(CGF); 4057 }; 4058 4059 llvm::Value *DepWaitTaskArgs[6]; 4060 if (NumDependencies) { 4061 DepWaitTaskArgs[0] = UpLoc; 4062 DepWaitTaskArgs[1] = ThreadID; 4063 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 4064 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4065 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4066 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4067 } 4068 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 4069 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, 4070 PrePostActionTy &) { 4071 auto &RT = CGF.CGM.getOpenMPRuntime(); 4072 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4073 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4074 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4075 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4076 // is specified. 4077 if (NumDependencies) 4078 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 4079 DepWaitTaskArgs); 4080 // Call proxy_task_entry(gtid, new_task); 4081 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( 4082 CodeGenFunction &CGF, PrePostActionTy &Action) { 4083 Action.Enter(CGF); 4084 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4085 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 4086 }; 4087 4088 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4089 // kmp_task_t *new_task); 4090 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4091 // kmp_task_t *new_task); 4092 RegionCodeGenTy RCG(CodeGen); 4093 CommonActionTy Action( 4094 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 4095 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 4096 RCG.setAction(Action); 4097 RCG(CGF); 4098 }; 4099 4100 if (IfCond) 4101 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4102 else { 4103 RegionCodeGenTy ThenRCG(ThenCodeGen); 4104 ThenRCG(CGF); 4105 } 4106 } 4107 4108 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4109 const OMPLoopDirective &D, 4110 llvm::Value *TaskFunction, 4111 QualType SharedsTy, Address Shareds, 4112 const Expr *IfCond, 4113 const OMPTaskDataTy &Data) { 4114 if (!CGF.HaveInsertPoint()) 4115 return; 4116 TaskResultTy Result = 4117 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4118 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4119 // libcall. 4120 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4121 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4122 // sched, kmp_uint64 grainsize, void *task_dup); 4123 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4124 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4125 llvm::Value *IfVal; 4126 if (IfCond) { 4127 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4128 /*isSigned=*/true); 4129 } else 4130 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4131 4132 LValue LBLVal = CGF.EmitLValueForField( 4133 Result.TDBase, 4134 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4135 auto *LBVar = 4136 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4137 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4138 /*IsInitializer=*/true); 4139 LValue UBLVal = CGF.EmitLValueForField( 4140 Result.TDBase, 4141 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4142 auto *UBVar = 4143 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4144 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4145 /*IsInitializer=*/true); 4146 LValue StLVal = CGF.EmitLValueForField( 4147 Result.TDBase, 4148 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4149 auto *StVar = 4150 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4151 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4152 /*IsInitializer=*/true); 4153 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4154 llvm::Value *TaskArgs[] = { 4155 UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), 4156 UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), 4157 llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), 4158 llvm::ConstantInt::getSigned( 4159 CGF.IntTy, Data.Schedule.getPointer() 4160 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4161 : NoSchedule), 4162 Data.Schedule.getPointer() 4163 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4164 /*isSigned=*/false) 4165 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4166 Result.TaskDupFn 4167 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, 4168 CGF.VoidPtrTy) 4169 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4170 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 4171 } 4172 4173 /// \brief Emit reduction operation for each element of array (required for 4174 /// array sections) LHS op = RHS. 4175 /// \param Type Type of array. 4176 /// \param LHSVar Variable on the left side of the reduction operation 4177 /// (references element of array in original variable). 4178 /// \param RHSVar Variable on the right side of the reduction operation 4179 /// (references element of array in original variable). 4180 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4181 /// RHSVar. 4182 static void EmitOMPAggregateReduction( 4183 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4184 const VarDecl *RHSVar, 4185 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4186 const Expr *, const Expr *)> &RedOpGen, 4187 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4188 const Expr *UpExpr = nullptr) { 4189 // Perform element-by-element initialization. 4190 QualType ElementTy; 4191 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4192 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4193 4194 // Drill down to the base element type on both arrays. 4195 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 4196 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4197 4198 auto RHSBegin = RHSAddr.getPointer(); 4199 auto LHSBegin = LHSAddr.getPointer(); 4200 // Cast from pointer to array type to pointer to single element. 4201 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 4202 // The basic structure here is a while-do loop. 4203 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4204 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4205 auto IsEmpty = 4206 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4207 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4208 4209 // Enter the loop body, making that address the current address. 4210 auto EntryBB = CGF.Builder.GetInsertBlock(); 4211 CGF.EmitBlock(BodyBB); 4212 4213 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4214 4215 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4216 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4217 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4218 Address RHSElementCurrent = 4219 Address(RHSElementPHI, 4220 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4221 4222 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4223 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4224 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4225 Address LHSElementCurrent = 4226 Address(LHSElementPHI, 4227 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4228 4229 // Emit copy. 4230 CodeGenFunction::OMPPrivateScope Scope(CGF); 4231 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 4232 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 4233 Scope.Privatize(); 4234 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4235 Scope.ForceCleanup(); 4236 4237 // Shift the address forward by one element. 4238 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4239 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 4240 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4241 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 4242 // Check whether we've reached the end. 4243 auto Done = 4244 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4245 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4246 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4247 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4248 4249 // Done. 4250 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4251 } 4252 4253 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4254 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4255 /// UDR combiner function. 4256 static void emitReductionCombiner(CodeGenFunction &CGF, 4257 const Expr *ReductionOp) { 4258 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4259 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4260 if (auto *DRE = 4261 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4262 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4263 std::pair<llvm::Function *, llvm::Function *> Reduction = 4264 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4265 RValue Func = RValue::get(Reduction.first); 4266 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4267 CGF.EmitIgnoredExpr(ReductionOp); 4268 return; 4269 } 4270 CGF.EmitIgnoredExpr(ReductionOp); 4271 } 4272 4273 llvm::Value *CGOpenMPRuntime::emitReductionFunction( 4274 CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 4275 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 4276 ArrayRef<const Expr *> ReductionOps) { 4277 auto &C = CGM.getContext(); 4278 4279 // void reduction_func(void *LHSArg, void *RHSArg); 4280 FunctionArgList Args; 4281 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4282 C.VoidPtrTy); 4283 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4284 C.VoidPtrTy); 4285 Args.push_back(&LHSArg); 4286 Args.push_back(&RHSArg); 4287 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4288 auto *Fn = llvm::Function::Create( 4289 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 4290 ".omp.reduction.reduction_func", &CGM.getModule()); 4291 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 4292 CodeGenFunction CGF(CGM); 4293 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 4294 4295 // Dst = (void*[n])(LHSArg); 4296 // Src = (void*[n])(RHSArg); 4297 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4298 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4299 ArgsType), CGF.getPointerAlign()); 4300 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4301 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4302 ArgsType), CGF.getPointerAlign()); 4303 4304 // ... 4305 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4306 // ... 4307 CodeGenFunction::OMPPrivateScope Scope(CGF); 4308 auto IPriv = Privates.begin(); 4309 unsigned Idx = 0; 4310 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4311 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4312 Scope.addPrivate(RHSVar, [&]() -> Address { 4313 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 4314 }); 4315 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4316 Scope.addPrivate(LHSVar, [&]() -> Address { 4317 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 4318 }); 4319 QualType PrivTy = (*IPriv)->getType(); 4320 if (PrivTy->isVariablyModifiedType()) { 4321 // Get array size and emit VLA type. 4322 ++Idx; 4323 Address Elem = 4324 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 4325 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4326 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 4327 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4328 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4329 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4330 CGF.EmitVariablyModifiedType(PrivTy); 4331 } 4332 } 4333 Scope.Privatize(); 4334 IPriv = Privates.begin(); 4335 auto ILHS = LHSExprs.begin(); 4336 auto IRHS = RHSExprs.begin(); 4337 for (auto *E : ReductionOps) { 4338 if ((*IPriv)->getType()->isArrayType()) { 4339 // Emit reduction for array section. 4340 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4341 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4342 EmitOMPAggregateReduction( 4343 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4344 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4345 emitReductionCombiner(CGF, E); 4346 }); 4347 } else 4348 // Emit reduction for array subscript or single variable. 4349 emitReductionCombiner(CGF, E); 4350 ++IPriv; 4351 ++ILHS; 4352 ++IRHS; 4353 } 4354 Scope.ForceCleanup(); 4355 CGF.FinishFunction(); 4356 return Fn; 4357 } 4358 4359 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 4360 const Expr *ReductionOp, 4361 const Expr *PrivateRef, 4362 const DeclRefExpr *LHS, 4363 const DeclRefExpr *RHS) { 4364 if (PrivateRef->getType()->isArrayType()) { 4365 // Emit reduction for array section. 4366 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4367 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4368 EmitOMPAggregateReduction( 4369 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4370 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4371 emitReductionCombiner(CGF, ReductionOp); 4372 }); 4373 } else 4374 // Emit reduction for array subscript or single variable. 4375 emitReductionCombiner(CGF, ReductionOp); 4376 } 4377 4378 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4379 ArrayRef<const Expr *> Privates, 4380 ArrayRef<const Expr *> LHSExprs, 4381 ArrayRef<const Expr *> RHSExprs, 4382 ArrayRef<const Expr *> ReductionOps, 4383 ReductionOptionsTy Options) { 4384 if (!CGF.HaveInsertPoint()) 4385 return; 4386 4387 bool WithNowait = Options.WithNowait; 4388 bool SimpleReduction = Options.SimpleReduction; 4389 4390 // Next code should be emitted for reduction: 4391 // 4392 // static kmp_critical_name lock = { 0 }; 4393 // 4394 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4395 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4396 // ... 4397 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4398 // *(Type<n>-1*)rhs[<n>-1]); 4399 // } 4400 // 4401 // ... 4402 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4403 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4404 // RedList, reduce_func, &<lock>)) { 4405 // case 1: 4406 // ... 4407 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4408 // ... 4409 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4410 // break; 4411 // case 2: 4412 // ... 4413 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4414 // ... 4415 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4416 // break; 4417 // default:; 4418 // } 4419 // 4420 // if SimpleReduction is true, only the next code is generated: 4421 // ... 4422 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4423 // ... 4424 4425 auto &C = CGM.getContext(); 4426 4427 if (SimpleReduction) { 4428 CodeGenFunction::RunCleanupsScope Scope(CGF); 4429 auto IPriv = Privates.begin(); 4430 auto ILHS = LHSExprs.begin(); 4431 auto IRHS = RHSExprs.begin(); 4432 for (auto *E : ReductionOps) { 4433 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4434 cast<DeclRefExpr>(*IRHS)); 4435 ++IPriv; 4436 ++ILHS; 4437 ++IRHS; 4438 } 4439 return; 4440 } 4441 4442 // 1. Build a list of reduction variables. 4443 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4444 auto Size = RHSExprs.size(); 4445 for (auto *E : Privates) { 4446 if (E->getType()->isVariablyModifiedType()) 4447 // Reserve place for array size. 4448 ++Size; 4449 } 4450 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4451 QualType ReductionArrayTy = 4452 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 4453 /*IndexTypeQuals=*/0); 4454 Address ReductionList = 4455 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4456 auto IPriv = Privates.begin(); 4457 unsigned Idx = 0; 4458 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4459 Address Elem = 4460 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 4461 CGF.Builder.CreateStore( 4462 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4463 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 4464 Elem); 4465 if ((*IPriv)->getType()->isVariablyModifiedType()) { 4466 // Store array size. 4467 ++Idx; 4468 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 4469 CGF.getPointerSize()); 4470 llvm::Value *Size = CGF.Builder.CreateIntCast( 4471 CGF.getVLASize( 4472 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 4473 .first, 4474 CGF.SizeTy, /*isSigned=*/false); 4475 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 4476 Elem); 4477 } 4478 } 4479 4480 // 2. Emit reduce_func(). 4481 auto *ReductionFn = emitReductionFunction( 4482 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 4483 LHSExprs, RHSExprs, ReductionOps); 4484 4485 // 3. Create static kmp_critical_name lock = { 0 }; 4486 auto *Lock = getCriticalRegionLock(".reduction"); 4487 4488 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4489 // RedList, reduce_func, &<lock>); 4490 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 4491 auto *ThreadId = getThreadID(CGF, Loc); 4492 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 4493 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4494 ReductionList.getPointer(), CGF.VoidPtrTy); 4495 llvm::Value *Args[] = { 4496 IdentTLoc, // ident_t *<loc> 4497 ThreadId, // i32 <gtid> 4498 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 4499 ReductionArrayTySize, // size_type sizeof(RedList) 4500 RL, // void *RedList 4501 ReductionFn, // void (*) (void *, void *) <reduce_func> 4502 Lock // kmp_critical_name *&<lock> 4503 }; 4504 auto Res = CGF.EmitRuntimeCall( 4505 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 4506 : OMPRTL__kmpc_reduce), 4507 Args); 4508 4509 // 5. Build switch(res) 4510 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 4511 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 4512 4513 // 6. Build case 1: 4514 // ... 4515 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4516 // ... 4517 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4518 // break; 4519 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 4520 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 4521 CGF.EmitBlock(Case1BB); 4522 4523 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4524 llvm::Value *EndArgs[] = { 4525 IdentTLoc, // ident_t *<loc> 4526 ThreadId, // i32 <gtid> 4527 Lock // kmp_critical_name *&<lock> 4528 }; 4529 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4530 CodeGenFunction &CGF, PrePostActionTy &Action) { 4531 auto &RT = CGF.CGM.getOpenMPRuntime(); 4532 auto IPriv = Privates.begin(); 4533 auto ILHS = LHSExprs.begin(); 4534 auto IRHS = RHSExprs.begin(); 4535 for (auto *E : ReductionOps) { 4536 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4537 cast<DeclRefExpr>(*IRHS)); 4538 ++IPriv; 4539 ++ILHS; 4540 ++IRHS; 4541 } 4542 }; 4543 RegionCodeGenTy RCG(CodeGen); 4544 CommonActionTy Action( 4545 nullptr, llvm::None, 4546 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 4547 : OMPRTL__kmpc_end_reduce), 4548 EndArgs); 4549 RCG.setAction(Action); 4550 RCG(CGF); 4551 4552 CGF.EmitBranch(DefaultBB); 4553 4554 // 7. Build case 2: 4555 // ... 4556 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4557 // ... 4558 // break; 4559 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 4560 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 4561 CGF.EmitBlock(Case2BB); 4562 4563 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4564 CodeGenFunction &CGF, PrePostActionTy &Action) { 4565 auto ILHS = LHSExprs.begin(); 4566 auto IRHS = RHSExprs.begin(); 4567 auto IPriv = Privates.begin(); 4568 for (auto *E : ReductionOps) { 4569 const Expr *XExpr = nullptr; 4570 const Expr *EExpr = nullptr; 4571 const Expr *UpExpr = nullptr; 4572 BinaryOperatorKind BO = BO_Comma; 4573 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 4574 if (BO->getOpcode() == BO_Assign) { 4575 XExpr = BO->getLHS(); 4576 UpExpr = BO->getRHS(); 4577 } 4578 } 4579 // Try to emit update expression as a simple atomic. 4580 auto *RHSExpr = UpExpr; 4581 if (RHSExpr) { 4582 // Analyze RHS part of the whole expression. 4583 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 4584 RHSExpr->IgnoreParenImpCasts())) { 4585 // If this is a conditional operator, analyze its condition for 4586 // min/max reduction operator. 4587 RHSExpr = ACO->getCond(); 4588 } 4589 if (auto *BORHS = 4590 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 4591 EExpr = BORHS->getRHS(); 4592 BO = BORHS->getOpcode(); 4593 } 4594 } 4595 if (XExpr) { 4596 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4597 auto &&AtomicRedGen = [BO, VD, 4598 Loc](CodeGenFunction &CGF, const Expr *XExpr, 4599 const Expr *EExpr, const Expr *UpExpr) { 4600 LValue X = CGF.EmitLValue(XExpr); 4601 RValue E; 4602 if (EExpr) 4603 E = CGF.EmitAnyExpr(EExpr); 4604 CGF.EmitOMPAtomicSimpleUpdateExpr( 4605 X, E, BO, /*IsXLHSInRHSPart=*/true, 4606 llvm::AtomicOrdering::Monotonic, Loc, 4607 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 4608 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4609 PrivateScope.addPrivate( 4610 VD, [&CGF, VD, XRValue, Loc]() -> Address { 4611 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 4612 CGF.emitOMPSimpleStore( 4613 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 4614 VD->getType().getNonReferenceType(), Loc); 4615 return LHSTemp; 4616 }); 4617 (void)PrivateScope.Privatize(); 4618 return CGF.EmitAnyExpr(UpExpr); 4619 }); 4620 }; 4621 if ((*IPriv)->getType()->isArrayType()) { 4622 // Emit atomic reduction for array section. 4623 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4624 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 4625 AtomicRedGen, XExpr, EExpr, UpExpr); 4626 } else 4627 // Emit atomic reduction for array subscript or single variable. 4628 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 4629 } else { 4630 // Emit as a critical region. 4631 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 4632 const Expr *, const Expr *) { 4633 auto &RT = CGF.CGM.getOpenMPRuntime(); 4634 RT.emitCriticalRegion( 4635 CGF, ".atomic_reduction", 4636 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 4637 Action.Enter(CGF); 4638 emitReductionCombiner(CGF, E); 4639 }, 4640 Loc); 4641 }; 4642 if ((*IPriv)->getType()->isArrayType()) { 4643 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4644 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4645 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4646 CritRedGen); 4647 } else 4648 CritRedGen(CGF, nullptr, nullptr, nullptr); 4649 } 4650 ++ILHS; 4651 ++IRHS; 4652 ++IPriv; 4653 } 4654 }; 4655 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 4656 if (!WithNowait) { 4657 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 4658 llvm::Value *EndArgs[] = { 4659 IdentTLoc, // ident_t *<loc> 4660 ThreadId, // i32 <gtid> 4661 Lock // kmp_critical_name *&<lock> 4662 }; 4663 CommonActionTy Action(nullptr, llvm::None, 4664 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 4665 EndArgs); 4666 AtomicRCG.setAction(Action); 4667 AtomicRCG(CGF); 4668 } else 4669 AtomicRCG(CGF); 4670 4671 CGF.EmitBranch(DefaultBB); 4672 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 4673 } 4674 4675 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 4676 SourceLocation Loc) { 4677 if (!CGF.HaveInsertPoint()) 4678 return; 4679 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 4680 // global_tid); 4681 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 4682 // Ignore return result until untied tasks are supported. 4683 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 4684 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4685 Region->emitUntiedSwitch(CGF); 4686 } 4687 4688 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 4689 OpenMPDirectiveKind InnerKind, 4690 const RegionCodeGenTy &CodeGen, 4691 bool HasCancel) { 4692 if (!CGF.HaveInsertPoint()) 4693 return; 4694 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 4695 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 4696 } 4697 4698 namespace { 4699 enum RTCancelKind { 4700 CancelNoreq = 0, 4701 CancelParallel = 1, 4702 CancelLoop = 2, 4703 CancelSections = 3, 4704 CancelTaskgroup = 4 4705 }; 4706 } // anonymous namespace 4707 4708 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 4709 RTCancelKind CancelKind = CancelNoreq; 4710 if (CancelRegion == OMPD_parallel) 4711 CancelKind = CancelParallel; 4712 else if (CancelRegion == OMPD_for) 4713 CancelKind = CancelLoop; 4714 else if (CancelRegion == OMPD_sections) 4715 CancelKind = CancelSections; 4716 else { 4717 assert(CancelRegion == OMPD_taskgroup); 4718 CancelKind = CancelTaskgroup; 4719 } 4720 return CancelKind; 4721 } 4722 4723 void CGOpenMPRuntime::emitCancellationPointCall( 4724 CodeGenFunction &CGF, SourceLocation Loc, 4725 OpenMPDirectiveKind CancelRegion) { 4726 if (!CGF.HaveInsertPoint()) 4727 return; 4728 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 4729 // global_tid, kmp_int32 cncl_kind); 4730 if (auto *OMPRegionInfo = 4731 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4732 // For 'cancellation point taskgroup', the task region info may not have a 4733 // cancel. This may instead happen in another adjacent task. 4734 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 4735 llvm::Value *Args[] = { 4736 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 4737 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4738 // Ignore return result until untied tasks are supported. 4739 auto *Result = CGF.EmitRuntimeCall( 4740 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 4741 // if (__kmpc_cancellationpoint()) { 4742 // exit from construct; 4743 // } 4744 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4745 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4746 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4747 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4748 CGF.EmitBlock(ExitBB); 4749 // exit from construct; 4750 auto CancelDest = 4751 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4752 CGF.EmitBranchThroughCleanup(CancelDest); 4753 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4754 } 4755 } 4756 } 4757 4758 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 4759 const Expr *IfCond, 4760 OpenMPDirectiveKind CancelRegion) { 4761 if (!CGF.HaveInsertPoint()) 4762 return; 4763 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 4764 // kmp_int32 cncl_kind); 4765 if (auto *OMPRegionInfo = 4766 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4767 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 4768 PrePostActionTy &) { 4769 auto &RT = CGF.CGM.getOpenMPRuntime(); 4770 llvm::Value *Args[] = { 4771 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 4772 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4773 // Ignore return result until untied tasks are supported. 4774 auto *Result = CGF.EmitRuntimeCall( 4775 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 4776 // if (__kmpc_cancel()) { 4777 // exit from construct; 4778 // } 4779 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4780 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4781 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4782 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4783 CGF.EmitBlock(ExitBB); 4784 // exit from construct; 4785 auto CancelDest = 4786 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4787 CGF.EmitBranchThroughCleanup(CancelDest); 4788 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4789 }; 4790 if (IfCond) 4791 emitOMPIfClause(CGF, IfCond, ThenGen, 4792 [](CodeGenFunction &, PrePostActionTy &) {}); 4793 else { 4794 RegionCodeGenTy ThenRCG(ThenGen); 4795 ThenRCG(CGF); 4796 } 4797 } 4798 } 4799 4800 /// \brief Obtain information that uniquely identifies a target entry. This 4801 /// consists of the file and device IDs as well as line number associated with 4802 /// the relevant entry source location. 4803 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 4804 unsigned &DeviceID, unsigned &FileID, 4805 unsigned &LineNum) { 4806 4807 auto &SM = C.getSourceManager(); 4808 4809 // The loc should be always valid and have a file ID (the user cannot use 4810 // #pragma directives in macros) 4811 4812 assert(Loc.isValid() && "Source location is expected to be always valid."); 4813 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 4814 4815 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 4816 assert(PLoc.isValid() && "Source location is expected to be always valid."); 4817 4818 llvm::sys::fs::UniqueID ID; 4819 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 4820 llvm_unreachable("Source file with target region no longer exists!"); 4821 4822 DeviceID = ID.getDevice(); 4823 FileID = ID.getFile(); 4824 LineNum = PLoc.getLine(); 4825 } 4826 4827 void CGOpenMPRuntime::emitTargetOutlinedFunction( 4828 const OMPExecutableDirective &D, StringRef ParentName, 4829 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4830 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4831 assert(!ParentName.empty() && "Invalid target region parent name!"); 4832 4833 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 4834 IsOffloadEntry, CodeGen); 4835 } 4836 4837 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 4838 const OMPExecutableDirective &D, StringRef ParentName, 4839 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4840 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4841 // Create a unique name for the entry function using the source location 4842 // information of the current target region. The name will be something like: 4843 // 4844 // __omp_offloading_DD_FFFF_PP_lBB 4845 // 4846 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 4847 // mangled name of the function that encloses the target region and BB is the 4848 // line number of the target region. 4849 4850 unsigned DeviceID; 4851 unsigned FileID; 4852 unsigned Line; 4853 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 4854 Line); 4855 SmallString<64> EntryFnName; 4856 { 4857 llvm::raw_svector_ostream OS(EntryFnName); 4858 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 4859 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 4860 } 4861 4862 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4863 4864 CodeGenFunction CGF(CGM, true); 4865 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 4866 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4867 4868 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 4869 4870 // If this target outline function is not an offload entry, we don't need to 4871 // register it. 4872 if (!IsOffloadEntry) 4873 return; 4874 4875 // The target region ID is used by the runtime library to identify the current 4876 // target region, so it only has to be unique and not necessarily point to 4877 // anything. It could be the pointer to the outlined function that implements 4878 // the target region, but we aren't using that so that the compiler doesn't 4879 // need to keep that, and could therefore inline the host function if proven 4880 // worthwhile during optimization. In the other hand, if emitting code for the 4881 // device, the ID has to be the function address so that it can retrieved from 4882 // the offloading entry and launched by the runtime library. We also mark the 4883 // outlined function to have external linkage in case we are emitting code for 4884 // the device, because these functions will be entry points to the device. 4885 4886 if (CGM.getLangOpts().OpenMPIsDevice) { 4887 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 4888 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 4889 } else 4890 OutlinedFnID = new llvm::GlobalVariable( 4891 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 4892 llvm::GlobalValue::PrivateLinkage, 4893 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 4894 4895 // Register the information for the entry associated with this target region. 4896 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 4897 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 4898 /*Flags=*/0); 4899 } 4900 4901 /// discard all CompoundStmts intervening between two constructs 4902 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 4903 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 4904 Body = CS->body_front(); 4905 4906 return Body; 4907 } 4908 4909 /// Emit the number of teams for a target directive. Inspect the num_teams 4910 /// clause associated with a teams construct combined or closely nested 4911 /// with the target directive. 4912 /// 4913 /// Emit a team of size one for directives such as 'target parallel' that 4914 /// have no associated teams construct. 4915 /// 4916 /// Otherwise, return nullptr. 4917 static llvm::Value * 4918 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4919 CodeGenFunction &CGF, 4920 const OMPExecutableDirective &D) { 4921 4922 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4923 "teams directive expected to be " 4924 "emitted only for the host!"); 4925 4926 auto &Bld = CGF.Builder; 4927 4928 // If the target directive is combined with a teams directive: 4929 // Return the value in the num_teams clause, if any. 4930 // Otherwise, return 0 to denote the runtime default. 4931 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 4932 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 4933 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 4934 auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 4935 /*IgnoreResultAssign*/ true); 4936 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 4937 /*IsSigned=*/true); 4938 } 4939 4940 // The default value is 0. 4941 return Bld.getInt32(0); 4942 } 4943 4944 // If the target directive is combined with a parallel directive but not a 4945 // teams directive, start one team. 4946 if (isOpenMPParallelDirective(D.getDirectiveKind())) 4947 return Bld.getInt32(1); 4948 4949 // If the current target region has a teams region enclosed, we need to get 4950 // the number of teams to pass to the runtime function call. This is done 4951 // by generating the expression in a inlined region. This is required because 4952 // the expression is captured in the enclosing target environment when the 4953 // teams directive is not combined with target. 4954 4955 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4956 4957 // FIXME: Accommodate other combined directives with teams when they become 4958 // available. 4959 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4960 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4961 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 4962 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4963 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4964 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 4965 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 4966 /*IsSigned=*/true); 4967 } 4968 4969 // If we have an enclosed teams directive but no num_teams clause we use 4970 // the default value 0. 4971 return Bld.getInt32(0); 4972 } 4973 4974 // No teams associated with the directive. 4975 return nullptr; 4976 } 4977 4978 /// Emit the number of threads for a target directive. Inspect the 4979 /// thread_limit clause associated with a teams construct combined or closely 4980 /// nested with the target directive. 4981 /// 4982 /// Emit the num_threads clause for directives such as 'target parallel' that 4983 /// have no associated teams construct. 4984 /// 4985 /// Otherwise, return nullptr. 4986 static llvm::Value * 4987 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4988 CodeGenFunction &CGF, 4989 const OMPExecutableDirective &D) { 4990 4991 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4992 "teams directive expected to be " 4993 "emitted only for the host!"); 4994 4995 auto &Bld = CGF.Builder; 4996 4997 // 4998 // If the target directive is combined with a teams directive: 4999 // Return the value in the thread_limit clause, if any. 5000 // 5001 // If the target directive is combined with a parallel directive: 5002 // Return the value in the num_threads clause, if any. 5003 // 5004 // If both clauses are set, select the minimum of the two. 5005 // 5006 // If neither teams or parallel combined directives set the number of threads 5007 // in a team, return 0 to denote the runtime default. 5008 // 5009 // If this is not a teams directive return nullptr. 5010 5011 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 5012 isOpenMPParallelDirective(D.getDirectiveKind())) { 5013 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 5014 llvm::Value *NumThreadsVal = nullptr; 5015 llvm::Value *ThreadLimitVal = nullptr; 5016 5017 if (const auto *ThreadLimitClause = 5018 D.getSingleClause<OMPThreadLimitClause>()) { 5019 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 5020 auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 5021 /*IgnoreResultAssign*/ true); 5022 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 5023 /*IsSigned=*/true); 5024 } 5025 5026 if (const auto *NumThreadsClause = 5027 D.getSingleClause<OMPNumThreadsClause>()) { 5028 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 5029 llvm::Value *NumThreads = 5030 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 5031 /*IgnoreResultAssign*/ true); 5032 NumThreadsVal = 5033 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 5034 } 5035 5036 // Select the lesser of thread_limit and num_threads. 5037 if (NumThreadsVal) 5038 ThreadLimitVal = ThreadLimitVal 5039 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 5040 ThreadLimitVal), 5041 NumThreadsVal, ThreadLimitVal) 5042 : NumThreadsVal; 5043 5044 // Set default value passed to the runtime if either teams or a target 5045 // parallel type directive is found but no clause is specified. 5046 if (!ThreadLimitVal) 5047 ThreadLimitVal = DefaultThreadLimitVal; 5048 5049 return ThreadLimitVal; 5050 } 5051 5052 // If the current target region has a teams region enclosed, we need to get 5053 // the thread limit to pass to the runtime function call. This is done 5054 // by generating the expression in a inlined region. This is required because 5055 // the expression is captured in the enclosing target environment when the 5056 // teams directive is not combined with target. 5057 5058 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5059 5060 // FIXME: Accommodate other combined directives with teams when they become 5061 // available. 5062 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 5063 ignoreCompoundStmts(CS.getCapturedStmt()))) { 5064 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 5065 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 5066 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5067 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 5068 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 5069 /*IsSigned=*/true); 5070 } 5071 5072 // If we have an enclosed teams directive but no thread_limit clause we use 5073 // the default value 0. 5074 return CGF.Builder.getInt32(0); 5075 } 5076 5077 // No teams associated with the directive. 5078 return nullptr; 5079 } 5080 5081 namespace { 5082 // \brief Utility to handle information from clauses associated with a given 5083 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 5084 // It provides a convenient interface to obtain the information and generate 5085 // code for that information. 5086 class MappableExprsHandler { 5087 public: 5088 /// \brief Values for bit flags used to specify the mapping type for 5089 /// offloading. 5090 enum OpenMPOffloadMappingFlags { 5091 /// \brief Allocate memory on the device and move data from host to device. 5092 OMP_MAP_TO = 0x01, 5093 /// \brief Allocate memory on the device and move data from device to host. 5094 OMP_MAP_FROM = 0x02, 5095 /// \brief Always perform the requested mapping action on the element, even 5096 /// if it was already mapped before. 5097 OMP_MAP_ALWAYS = 0x04, 5098 /// \brief Delete the element from the device environment, ignoring the 5099 /// current reference count associated with the element. 5100 OMP_MAP_DELETE = 0x08, 5101 /// \brief The element being mapped is a pointer, therefore the pointee 5102 /// should be mapped as well. 5103 OMP_MAP_IS_PTR = 0x10, 5104 /// \brief This flags signals that an argument is the first one relating to 5105 /// a map/private clause expression. For some cases a single 5106 /// map/privatization results in multiple arguments passed to the runtime 5107 /// library. 5108 OMP_MAP_FIRST_REF = 0x20, 5109 /// \brief Signal that the runtime library has to return the device pointer 5110 /// in the current position for the data being mapped. 5111 OMP_MAP_RETURN_PTR = 0x40, 5112 /// \brief This flag signals that the reference being passed is a pointer to 5113 /// private data. 5114 OMP_MAP_PRIVATE_PTR = 0x80, 5115 /// \brief Pass the element to the device by value. 5116 OMP_MAP_PRIVATE_VAL = 0x100, 5117 }; 5118 5119 /// Class that associates information with a base pointer to be passed to the 5120 /// runtime library. 5121 class BasePointerInfo { 5122 /// The base pointer. 5123 llvm::Value *Ptr = nullptr; 5124 /// The base declaration that refers to this device pointer, or null if 5125 /// there is none. 5126 const ValueDecl *DevPtrDecl = nullptr; 5127 5128 public: 5129 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 5130 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 5131 llvm::Value *operator*() const { return Ptr; } 5132 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 5133 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 5134 }; 5135 5136 typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; 5137 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 5138 typedef SmallVector<unsigned, 16> MapFlagsArrayTy; 5139 5140 private: 5141 /// \brief Directive from where the map clauses were extracted. 5142 const OMPExecutableDirective &CurDir; 5143 5144 /// \brief Function the directive is being generated for. 5145 CodeGenFunction &CGF; 5146 5147 /// \brief Set of all first private variables in the current directive. 5148 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 5149 5150 /// Map between device pointer declarations and their expression components. 5151 /// The key value for declarations in 'this' is null. 5152 llvm::DenseMap< 5153 const ValueDecl *, 5154 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 5155 DevPointersMap; 5156 5157 llvm::Value *getExprTypeSize(const Expr *E) const { 5158 auto ExprTy = E->getType().getCanonicalType(); 5159 5160 // Reference types are ignored for mapping purposes. 5161 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 5162 ExprTy = RefTy->getPointeeType().getCanonicalType(); 5163 5164 // Given that an array section is considered a built-in type, we need to 5165 // do the calculation based on the length of the section instead of relying 5166 // on CGF.getTypeSize(E->getType()). 5167 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 5168 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 5169 OAE->getBase()->IgnoreParenImpCasts()) 5170 .getCanonicalType(); 5171 5172 // If there is no length associated with the expression, that means we 5173 // are using the whole length of the base. 5174 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 5175 return CGF.getTypeSize(BaseTy); 5176 5177 llvm::Value *ElemSize; 5178 if (auto *PTy = BaseTy->getAs<PointerType>()) 5179 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 5180 else { 5181 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 5182 assert(ATy && "Expecting array type if not a pointer type."); 5183 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 5184 } 5185 5186 // If we don't have a length at this point, that is because we have an 5187 // array section with a single element. 5188 if (!OAE->getLength()) 5189 return ElemSize; 5190 5191 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 5192 LengthVal = 5193 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 5194 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 5195 } 5196 return CGF.getTypeSize(ExprTy); 5197 } 5198 5199 /// \brief Return the corresponding bits for a given map clause modifier. Add 5200 /// a flag marking the map as a pointer if requested. Add a flag marking the 5201 /// map as the first one of a series of maps that relate to the same map 5202 /// expression. 5203 unsigned getMapTypeBits(OpenMPMapClauseKind MapType, 5204 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 5205 bool AddIsFirstFlag) const { 5206 unsigned Bits = 0u; 5207 switch (MapType) { 5208 case OMPC_MAP_alloc: 5209 case OMPC_MAP_release: 5210 // alloc and release is the default behavior in the runtime library, i.e. 5211 // if we don't pass any bits alloc/release that is what the runtime is 5212 // going to do. Therefore, we don't need to signal anything for these two 5213 // type modifiers. 5214 break; 5215 case OMPC_MAP_to: 5216 Bits = OMP_MAP_TO; 5217 break; 5218 case OMPC_MAP_from: 5219 Bits = OMP_MAP_FROM; 5220 break; 5221 case OMPC_MAP_tofrom: 5222 Bits = OMP_MAP_TO | OMP_MAP_FROM; 5223 break; 5224 case OMPC_MAP_delete: 5225 Bits = OMP_MAP_DELETE; 5226 break; 5227 default: 5228 llvm_unreachable("Unexpected map type!"); 5229 break; 5230 } 5231 if (AddPtrFlag) 5232 Bits |= OMP_MAP_IS_PTR; 5233 if (AddIsFirstFlag) 5234 Bits |= OMP_MAP_FIRST_REF; 5235 if (MapTypeModifier == OMPC_MAP_always) 5236 Bits |= OMP_MAP_ALWAYS; 5237 return Bits; 5238 } 5239 5240 /// \brief Return true if the provided expression is a final array section. A 5241 /// final array section, is one whose length can't be proved to be one. 5242 bool isFinalArraySectionExpression(const Expr *E) const { 5243 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 5244 5245 // It is not an array section and therefore not a unity-size one. 5246 if (!OASE) 5247 return false; 5248 5249 // An array section with no colon always refer to a single element. 5250 if (OASE->getColonLoc().isInvalid()) 5251 return false; 5252 5253 auto *Length = OASE->getLength(); 5254 5255 // If we don't have a length we have to check if the array has size 1 5256 // for this dimension. Also, we should always expect a length if the 5257 // base type is pointer. 5258 if (!Length) { 5259 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 5260 OASE->getBase()->IgnoreParenImpCasts()) 5261 .getCanonicalType(); 5262 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 5263 return ATy->getSize().getSExtValue() != 1; 5264 // If we don't have a constant dimension length, we have to consider 5265 // the current section as having any size, so it is not necessarily 5266 // unitary. If it happen to be unity size, that's user fault. 5267 return true; 5268 } 5269 5270 // Check if the length evaluates to 1. 5271 llvm::APSInt ConstLength; 5272 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 5273 return true; // Can have more that size 1. 5274 5275 return ConstLength.getSExtValue() != 1; 5276 } 5277 5278 /// \brief Generate the base pointers, section pointers, sizes and map type 5279 /// bits for the provided map type, map modifier, and expression components. 5280 /// \a IsFirstComponent should be set to true if the provided set of 5281 /// components is the first associated with a capture. 5282 void generateInfoForComponentList( 5283 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5284 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5285 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 5286 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 5287 bool IsFirstComponentList) const { 5288 5289 // The following summarizes what has to be generated for each map and the 5290 // types bellow. The generated information is expressed in this order: 5291 // base pointer, section pointer, size, flags 5292 // (to add to the ones that come from the map type and modifier). 5293 // 5294 // double d; 5295 // int i[100]; 5296 // float *p; 5297 // 5298 // struct S1 { 5299 // int i; 5300 // float f[50]; 5301 // } 5302 // struct S2 { 5303 // int i; 5304 // float f[50]; 5305 // S1 s; 5306 // double *p; 5307 // struct S2 *ps; 5308 // } 5309 // S2 s; 5310 // S2 *ps; 5311 // 5312 // map(d) 5313 // &d, &d, sizeof(double), noflags 5314 // 5315 // map(i) 5316 // &i, &i, 100*sizeof(int), noflags 5317 // 5318 // map(i[1:23]) 5319 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 5320 // 5321 // map(p) 5322 // &p, &p, sizeof(float*), noflags 5323 // 5324 // map(p[1:24]) 5325 // p, &p[1], 24*sizeof(float), noflags 5326 // 5327 // map(s) 5328 // &s, &s, sizeof(S2), noflags 5329 // 5330 // map(s.i) 5331 // &s, &(s.i), sizeof(int), noflags 5332 // 5333 // map(s.s.f) 5334 // &s, &(s.i.f), 50*sizeof(int), noflags 5335 // 5336 // map(s.p) 5337 // &s, &(s.p), sizeof(double*), noflags 5338 // 5339 // map(s.p[:22], s.a s.b) 5340 // &s, &(s.p), sizeof(double*), noflags 5341 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag 5342 // 5343 // map(s.ps) 5344 // &s, &(s.ps), sizeof(S2*), noflags 5345 // 5346 // map(s.ps->s.i) 5347 // &s, &(s.ps), sizeof(S2*), noflags 5348 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag 5349 // 5350 // map(s.ps->ps) 5351 // &s, &(s.ps), sizeof(S2*), noflags 5352 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5353 // 5354 // map(s.ps->ps->ps) 5355 // &s, &(s.ps), sizeof(S2*), noflags 5356 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5357 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5358 // 5359 // map(s.ps->ps->s.f[:22]) 5360 // &s, &(s.ps), sizeof(S2*), noflags 5361 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5362 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag 5363 // 5364 // map(ps) 5365 // &ps, &ps, sizeof(S2*), noflags 5366 // 5367 // map(ps->i) 5368 // ps, &(ps->i), sizeof(int), noflags 5369 // 5370 // map(ps->s.f) 5371 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 5372 // 5373 // map(ps->p) 5374 // ps, &(ps->p), sizeof(double*), noflags 5375 // 5376 // map(ps->p[:22]) 5377 // ps, &(ps->p), sizeof(double*), noflags 5378 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag 5379 // 5380 // map(ps->ps) 5381 // ps, &(ps->ps), sizeof(S2*), noflags 5382 // 5383 // map(ps->ps->s.i) 5384 // ps, &(ps->ps), sizeof(S2*), noflags 5385 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag 5386 // 5387 // map(ps->ps->ps) 5388 // ps, &(ps->ps), sizeof(S2*), noflags 5389 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5390 // 5391 // map(ps->ps->ps->ps) 5392 // ps, &(ps->ps), sizeof(S2*), noflags 5393 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5394 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5395 // 5396 // map(ps->ps->ps->s.f[:22]) 5397 // ps, &(ps->ps), sizeof(S2*), noflags 5398 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5399 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + 5400 // extra_flag 5401 5402 // Track if the map information being generated is the first for a capture. 5403 bool IsCaptureFirstInfo = IsFirstComponentList; 5404 5405 // Scan the components from the base to the complete expression. 5406 auto CI = Components.rbegin(); 5407 auto CE = Components.rend(); 5408 auto I = CI; 5409 5410 // Track if the map information being generated is the first for a list of 5411 // components. 5412 bool IsExpressionFirstInfo = true; 5413 llvm::Value *BP = nullptr; 5414 5415 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 5416 // The base is the 'this' pointer. The content of the pointer is going 5417 // to be the base of the field being mapped. 5418 BP = CGF.EmitScalarExpr(ME->getBase()); 5419 } else { 5420 // The base is the reference to the variable. 5421 // BP = &Var. 5422 BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) 5423 .getPointer(); 5424 5425 // If the variable is a pointer and is being dereferenced (i.e. is not 5426 // the last component), the base has to be the pointer itself, not its 5427 // reference. References are ignored for mapping purposes. 5428 QualType Ty = 5429 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 5430 if (Ty->isAnyPointerType() && std::next(I) != CE) { 5431 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); 5432 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 5433 Ty->castAs<PointerType>()) 5434 .getPointer(); 5435 5436 // We do not need to generate individual map information for the 5437 // pointer, it can be associated with the combined storage. 5438 ++I; 5439 } 5440 } 5441 5442 for (; I != CE; ++I) { 5443 auto Next = std::next(I); 5444 5445 // We need to generate the addresses and sizes if this is the last 5446 // component, if the component is a pointer or if it is an array section 5447 // whose length can't be proved to be one. If this is a pointer, it 5448 // becomes the base address for the following components. 5449 5450 // A final array section, is one whose length can't be proved to be one. 5451 bool IsFinalArraySection = 5452 isFinalArraySectionExpression(I->getAssociatedExpression()); 5453 5454 // Get information on whether the element is a pointer. Have to do a 5455 // special treatment for array sections given that they are built-in 5456 // types. 5457 const auto *OASE = 5458 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 5459 bool IsPointer = 5460 (OASE && 5461 OMPArraySectionExpr::getBaseOriginalType(OASE) 5462 .getCanonicalType() 5463 ->isAnyPointerType()) || 5464 I->getAssociatedExpression()->getType()->isAnyPointerType(); 5465 5466 if (Next == CE || IsPointer || IsFinalArraySection) { 5467 5468 // If this is not the last component, we expect the pointer to be 5469 // associated with an array expression or member expression. 5470 assert((Next == CE || 5471 isa<MemberExpr>(Next->getAssociatedExpression()) || 5472 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 5473 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 5474 "Unexpected expression"); 5475 5476 auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); 5477 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 5478 5479 // If we have a member expression and the current component is a 5480 // reference, we have to map the reference too. Whenever we have a 5481 // reference, the section that reference refers to is going to be a 5482 // load instruction from the storage assigned to the reference. 5483 if (isa<MemberExpr>(I->getAssociatedExpression()) && 5484 I->getAssociatedDeclaration()->getType()->isReferenceType()) { 5485 auto *LI = cast<llvm::LoadInst>(LB); 5486 auto *RefAddr = LI->getPointerOperand(); 5487 5488 BasePointers.push_back(BP); 5489 Pointers.push_back(RefAddr); 5490 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 5491 Types.push_back(getMapTypeBits( 5492 /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, 5493 !IsExpressionFirstInfo, IsCaptureFirstInfo)); 5494 IsExpressionFirstInfo = false; 5495 IsCaptureFirstInfo = false; 5496 // The reference will be the next base address. 5497 BP = RefAddr; 5498 } 5499 5500 BasePointers.push_back(BP); 5501 Pointers.push_back(LB); 5502 Sizes.push_back(Size); 5503 5504 // We need to add a pointer flag for each map that comes from the 5505 // same expression except for the first one. We also need to signal 5506 // this map is the first one that relates with the current capture 5507 // (there is a set of entries for each capture). 5508 Types.push_back(getMapTypeBits(MapType, MapTypeModifier, 5509 !IsExpressionFirstInfo, 5510 IsCaptureFirstInfo)); 5511 5512 // If we have a final array section, we are done with this expression. 5513 if (IsFinalArraySection) 5514 break; 5515 5516 // The pointer becomes the base for the next element. 5517 if (Next != CE) 5518 BP = LB; 5519 5520 IsExpressionFirstInfo = false; 5521 IsCaptureFirstInfo = false; 5522 continue; 5523 } 5524 } 5525 } 5526 5527 /// \brief Return the adjusted map modifiers if the declaration a capture 5528 /// refers to appears in a first-private clause. This is expected to be used 5529 /// only with directives that start with 'target'. 5530 unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, 5531 unsigned CurrentModifiers) { 5532 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 5533 5534 // A first private variable captured by reference will use only the 5535 // 'private ptr' and 'map to' flag. Return the right flags if the captured 5536 // declaration is known as first-private in this handler. 5537 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 5538 return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | 5539 MappableExprsHandler::OMP_MAP_TO; 5540 5541 // We didn't modify anything. 5542 return CurrentModifiers; 5543 } 5544 5545 public: 5546 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 5547 : CurDir(Dir), CGF(CGF) { 5548 // Extract firstprivate clause information. 5549 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 5550 for (const auto *D : C->varlists()) 5551 FirstPrivateDecls.insert( 5552 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 5553 // Extract device pointer clause information. 5554 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 5555 for (auto L : C->component_lists()) 5556 DevPointersMap[L.first].push_back(L.second); 5557 } 5558 5559 /// \brief Generate all the base pointers, section pointers, sizes and map 5560 /// types for the extracted mappable expressions. Also, for each item that 5561 /// relates with a device pointer, a pair of the relevant declaration and 5562 /// index where it occurs is appended to the device pointers info array. 5563 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 5564 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 5565 MapFlagsArrayTy &Types) const { 5566 BasePointers.clear(); 5567 Pointers.clear(); 5568 Sizes.clear(); 5569 Types.clear(); 5570 5571 struct MapInfo { 5572 /// Kind that defines how a device pointer has to be returned. 5573 enum ReturnPointerKind { 5574 // Don't have to return any pointer. 5575 RPK_None, 5576 // Pointer is the base of the declaration. 5577 RPK_Base, 5578 // Pointer is a member of the base declaration - 'this' 5579 RPK_Member, 5580 // Pointer is a reference and a member of the base declaration - 'this' 5581 RPK_MemberReference, 5582 }; 5583 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 5584 OpenMPMapClauseKind MapType; 5585 OpenMPMapClauseKind MapTypeModifier; 5586 ReturnPointerKind ReturnDevicePointer; 5587 5588 MapInfo() 5589 : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), 5590 ReturnDevicePointer(RPK_None) {} 5591 MapInfo( 5592 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5593 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5594 ReturnPointerKind ReturnDevicePointer) 5595 : Components(Components), MapType(MapType), 5596 MapTypeModifier(MapTypeModifier), 5597 ReturnDevicePointer(ReturnDevicePointer) {} 5598 }; 5599 5600 // We have to process the component lists that relate with the same 5601 // declaration in a single chunk so that we can generate the map flags 5602 // correctly. Therefore, we organize all lists in a map. 5603 llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 5604 5605 // Helper function to fill the information map for the different supported 5606 // clauses. 5607 auto &&InfoGen = [&Info]( 5608 const ValueDecl *D, 5609 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 5610 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 5611 MapInfo::ReturnPointerKind ReturnDevicePointer) { 5612 const ValueDecl *VD = 5613 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 5614 Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); 5615 }; 5616 5617 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5618 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 5619 for (auto L : C->component_lists()) 5620 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 5621 MapInfo::RPK_None); 5622 for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 5623 for (auto L : C->component_lists()) 5624 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 5625 MapInfo::RPK_None); 5626 for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 5627 for (auto L : C->component_lists()) 5628 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 5629 MapInfo::RPK_None); 5630 5631 // Look at the use_device_ptr clause information and mark the existing map 5632 // entries as such. If there is no map information for an entry in the 5633 // use_device_ptr list, we create one with map type 'alloc' and zero size 5634 // section. It is the user fault if that was not mapped before. 5635 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5636 for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) 5637 for (auto L : C->component_lists()) { 5638 assert(!L.second.empty() && "Not expecting empty list of components!"); 5639 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 5640 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 5641 auto *IE = L.second.back().getAssociatedExpression(); 5642 // If the first component is a member expression, we have to look into 5643 // 'this', which maps to null in the map of map information. Otherwise 5644 // look directly for the information. 5645 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 5646 5647 // We potentially have map information for this declaration already. 5648 // Look for the first set of components that refer to it. 5649 if (It != Info.end()) { 5650 auto CI = std::find_if( 5651 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 5652 return MI.Components.back().getAssociatedDeclaration() == VD; 5653 }); 5654 // If we found a map entry, signal that the pointer has to be returned 5655 // and move on to the next declaration. 5656 if (CI != It->second.end()) { 5657 CI->ReturnDevicePointer = isa<MemberExpr>(IE) 5658 ? (VD->getType()->isReferenceType() 5659 ? MapInfo::RPK_MemberReference 5660 : MapInfo::RPK_Member) 5661 : MapInfo::RPK_Base; 5662 continue; 5663 } 5664 } 5665 5666 // We didn't find any match in our map information - generate a zero 5667 // size array section. 5668 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 5669 llvm::Value *Ptr = 5670 this->CGF 5671 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation()) 5672 .getScalarVal(); 5673 BasePointers.push_back({Ptr, VD}); 5674 Pointers.push_back(Ptr); 5675 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 5676 Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); 5677 } 5678 5679 for (auto &M : Info) { 5680 // We need to know when we generate information for the first component 5681 // associated with a capture, because the mapping flags depend on it. 5682 bool IsFirstComponentList = true; 5683 for (MapInfo &L : M.second) { 5684 assert(!L.Components.empty() && 5685 "Not expecting declaration with no component lists."); 5686 5687 // Remember the current base pointer index. 5688 unsigned CurrentBasePointersIdx = BasePointers.size(); 5689 // FIXME: MSVC 2013 seems to require this-> to find the member method. 5690 this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, 5691 L.Components, BasePointers, Pointers, 5692 Sizes, Types, IsFirstComponentList); 5693 5694 // If this entry relates with a device pointer, set the relevant 5695 // declaration and add the 'return pointer' flag. 5696 if (IsFirstComponentList && 5697 L.ReturnDevicePointer != MapInfo::RPK_None) { 5698 // If the pointer is not the base of the map, we need to skip the 5699 // base. If it is a reference in a member field, we also need to skip 5700 // the map of the reference. 5701 if (L.ReturnDevicePointer != MapInfo::RPK_Base) { 5702 ++CurrentBasePointersIdx; 5703 if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) 5704 ++CurrentBasePointersIdx; 5705 } 5706 assert(BasePointers.size() > CurrentBasePointersIdx && 5707 "Unexpected number of mapped base pointers."); 5708 5709 auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); 5710 assert(RelevantVD && 5711 "No relevant declaration related with device pointer??"); 5712 5713 BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 5714 Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; 5715 } 5716 IsFirstComponentList = false; 5717 } 5718 } 5719 } 5720 5721 /// \brief Generate the base pointers, section pointers, sizes and map types 5722 /// associated to a given capture. 5723 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 5724 llvm::Value *Arg, 5725 MapBaseValuesArrayTy &BasePointers, 5726 MapValuesArrayTy &Pointers, 5727 MapValuesArrayTy &Sizes, 5728 MapFlagsArrayTy &Types) const { 5729 assert(!Cap->capturesVariableArrayType() && 5730 "Not expecting to generate map info for a variable array type!"); 5731 5732 BasePointers.clear(); 5733 Pointers.clear(); 5734 Sizes.clear(); 5735 Types.clear(); 5736 5737 // We need to know when we generating information for the first component 5738 // associated with a capture, because the mapping flags depend on it. 5739 bool IsFirstComponentList = true; 5740 5741 const ValueDecl *VD = 5742 Cap->capturesThis() 5743 ? nullptr 5744 : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); 5745 5746 // If this declaration appears in a is_device_ptr clause we just have to 5747 // pass the pointer by value. If it is a reference to a declaration, we just 5748 // pass its value, otherwise, if it is a member expression, we need to map 5749 // 'to' the field. 5750 if (!VD) { 5751 auto It = DevPointersMap.find(VD); 5752 if (It != DevPointersMap.end()) { 5753 for (auto L : It->second) { 5754 generateInfoForComponentList( 5755 /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, 5756 BasePointers, Pointers, Sizes, Types, IsFirstComponentList); 5757 IsFirstComponentList = false; 5758 } 5759 return; 5760 } 5761 } else if (DevPointersMap.count(VD)) { 5762 BasePointers.push_back({Arg, VD}); 5763 Pointers.push_back(Arg); 5764 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 5765 Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF); 5766 return; 5767 } 5768 5769 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5770 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 5771 for (auto L : C->decl_component_lists(VD)) { 5772 assert(L.first == VD && 5773 "We got information for the wrong declaration??"); 5774 assert(!L.second.empty() && 5775 "Not expecting declaration with no component lists."); 5776 generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), 5777 L.second, BasePointers, Pointers, Sizes, 5778 Types, IsFirstComponentList); 5779 IsFirstComponentList = false; 5780 } 5781 5782 return; 5783 } 5784 5785 /// \brief Generate the default map information for a given capture \a CI, 5786 /// record field declaration \a RI and captured value \a CV. 5787 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 5788 const FieldDecl &RI, llvm::Value *CV, 5789 MapBaseValuesArrayTy &CurBasePointers, 5790 MapValuesArrayTy &CurPointers, 5791 MapValuesArrayTy &CurSizes, 5792 MapFlagsArrayTy &CurMapTypes) { 5793 5794 // Do the default mapping. 5795 if (CI.capturesThis()) { 5796 CurBasePointers.push_back(CV); 5797 CurPointers.push_back(CV); 5798 const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 5799 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 5800 // Default map type. 5801 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 5802 } else if (CI.capturesVariableByCopy()) { 5803 CurBasePointers.push_back(CV); 5804 CurPointers.push_back(CV); 5805 if (!RI.getType()->isAnyPointerType()) { 5806 // We have to signal to the runtime captures passed by value that are 5807 // not pointers. 5808 CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); 5809 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 5810 } else { 5811 // Pointers are implicitly mapped with a zero size and no flags 5812 // (other than first map that is added for all implicit maps). 5813 CurMapTypes.push_back(0u); 5814 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 5815 } 5816 } else { 5817 assert(CI.capturesVariable() && "Expected captured reference."); 5818 CurBasePointers.push_back(CV); 5819 CurPointers.push_back(CV); 5820 5821 const ReferenceType *PtrTy = 5822 cast<ReferenceType>(RI.getType().getTypePtr()); 5823 QualType ElementType = PtrTy->getPointeeType(); 5824 CurSizes.push_back(CGF.getTypeSize(ElementType)); 5825 // The default map type for a scalar/complex type is 'to' because by 5826 // default the value doesn't have to be retrieved. For an aggregate 5827 // type, the default is 'tofrom'. 5828 CurMapTypes.push_back(ElementType->isAggregateType() 5829 ? (OMP_MAP_TO | OMP_MAP_FROM) 5830 : OMP_MAP_TO); 5831 5832 // If we have a capture by reference we may need to add the private 5833 // pointer flag if the base declaration shows in some first-private 5834 // clause. 5835 CurMapTypes.back() = 5836 adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); 5837 } 5838 // Every default map produces a single argument, so, it is always the 5839 // first one. 5840 CurMapTypes.back() |= OMP_MAP_FIRST_REF; 5841 } 5842 }; 5843 5844 enum OpenMPOffloadingReservedDeviceIDs { 5845 /// \brief Device ID if the device was not defined, runtime should get it 5846 /// from environment variables in the spec. 5847 OMP_DEVICEID_UNDEF = -1, 5848 }; 5849 } // anonymous namespace 5850 5851 /// \brief Emit the arrays used to pass the captures and map information to the 5852 /// offloading runtime library. If there is no map or capture information, 5853 /// return nullptr by reference. 5854 static void 5855 emitOffloadingArrays(CodeGenFunction &CGF, 5856 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 5857 MappableExprsHandler::MapValuesArrayTy &Pointers, 5858 MappableExprsHandler::MapValuesArrayTy &Sizes, 5859 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 5860 CGOpenMPRuntime::TargetDataInfo &Info) { 5861 auto &CGM = CGF.CGM; 5862 auto &Ctx = CGF.getContext(); 5863 5864 // Reset the array information. 5865 Info.clearArrayInfo(); 5866 Info.NumberOfPtrs = BasePointers.size(); 5867 5868 if (Info.NumberOfPtrs) { 5869 // Detect if we have any capture size requiring runtime evaluation of the 5870 // size so that a constant array could be eventually used. 5871 bool hasRuntimeEvaluationCaptureSize = false; 5872 for (auto *S : Sizes) 5873 if (!isa<llvm::Constant>(S)) { 5874 hasRuntimeEvaluationCaptureSize = true; 5875 break; 5876 } 5877 5878 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 5879 QualType PointerArrayType = 5880 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 5881 /*IndexTypeQuals=*/0); 5882 5883 Info.BasePointersArray = 5884 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 5885 Info.PointersArray = 5886 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 5887 5888 // If we don't have any VLA types or other types that require runtime 5889 // evaluation, we can use a constant array for the map sizes, otherwise we 5890 // need to fill up the arrays as we do for the pointers. 5891 if (hasRuntimeEvaluationCaptureSize) { 5892 QualType SizeArrayType = Ctx.getConstantArrayType( 5893 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 5894 /*IndexTypeQuals=*/0); 5895 Info.SizesArray = 5896 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 5897 } else { 5898 // We expect all the sizes to be constant, so we collect them to create 5899 // a constant array. 5900 SmallVector<llvm::Constant *, 16> ConstSizes; 5901 for (auto S : Sizes) 5902 ConstSizes.push_back(cast<llvm::Constant>(S)); 5903 5904 auto *SizesArrayInit = llvm::ConstantArray::get( 5905 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 5906 auto *SizesArrayGbl = new llvm::GlobalVariable( 5907 CGM.getModule(), SizesArrayInit->getType(), 5908 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5909 SizesArrayInit, ".offload_sizes"); 5910 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 5911 Info.SizesArray = SizesArrayGbl; 5912 } 5913 5914 // The map types are always constant so we don't need to generate code to 5915 // fill arrays. Instead, we create an array constant. 5916 llvm::Constant *MapTypesArrayInit = 5917 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 5918 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 5919 CGM.getModule(), MapTypesArrayInit->getType(), 5920 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5921 MapTypesArrayInit, ".offload_maptypes"); 5922 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 5923 Info.MapTypesArray = MapTypesArrayGbl; 5924 5925 for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { 5926 llvm::Value *BPVal = *BasePointers[i]; 5927 if (BPVal->getType()->isPointerTy()) 5928 BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); 5929 else { 5930 assert(BPVal->getType()->isIntegerTy() && 5931 "If not a pointer, the value type must be an integer."); 5932 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); 5933 } 5934 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 5935 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5936 Info.BasePointersArray, 0, i); 5937 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5938 CGF.Builder.CreateStore(BPVal, BPAddr); 5939 5940 if (Info.requiresDevicePointerInfo()) 5941 if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) 5942 Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); 5943 5944 llvm::Value *PVal = Pointers[i]; 5945 if (PVal->getType()->isPointerTy()) 5946 PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); 5947 else { 5948 assert(PVal->getType()->isIntegerTy() && 5949 "If not a pointer, the value type must be an integer."); 5950 PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); 5951 } 5952 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 5953 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5954 Info.PointersArray, 0, i); 5955 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5956 CGF.Builder.CreateStore(PVal, PAddr); 5957 5958 if (hasRuntimeEvaluationCaptureSize) { 5959 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 5960 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 5961 Info.SizesArray, 5962 /*Idx0=*/0, 5963 /*Idx1=*/i); 5964 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 5965 CGF.Builder.CreateStore( 5966 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 5967 SAddr); 5968 } 5969 } 5970 } 5971 } 5972 /// \brief Emit the arguments to be passed to the runtime library based on the 5973 /// arrays of pointers, sizes and map types. 5974 static void emitOffloadingArraysArgument( 5975 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 5976 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 5977 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 5978 auto &CGM = CGF.CGM; 5979 if (Info.NumberOfPtrs) { 5980 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5981 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5982 Info.BasePointersArray, 5983 /*Idx0=*/0, /*Idx1=*/0); 5984 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5985 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5986 Info.PointersArray, 5987 /*Idx0=*/0, 5988 /*Idx1=*/0); 5989 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5990 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 5991 /*Idx0=*/0, /*Idx1=*/0); 5992 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5993 llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), 5994 Info.MapTypesArray, 5995 /*Idx0=*/0, 5996 /*Idx1=*/0); 5997 } else { 5998 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5999 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 6000 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 6001 MapTypesArrayArg = 6002 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 6003 } 6004 } 6005 6006 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 6007 const OMPExecutableDirective &D, 6008 llvm::Value *OutlinedFn, 6009 llvm::Value *OutlinedFnID, 6010 const Expr *IfCond, const Expr *Device, 6011 ArrayRef<llvm::Value *> CapturedVars) { 6012 if (!CGF.HaveInsertPoint()) 6013 return; 6014 6015 assert(OutlinedFn && "Invalid outlined function!"); 6016 6017 auto &Ctx = CGF.getContext(); 6018 6019 // Fill up the arrays with all the captured variables. 6020 MappableExprsHandler::MapValuesArrayTy KernelArgs; 6021 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6022 MappableExprsHandler::MapValuesArrayTy Pointers; 6023 MappableExprsHandler::MapValuesArrayTy Sizes; 6024 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6025 6026 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 6027 MappableExprsHandler::MapValuesArrayTy CurPointers; 6028 MappableExprsHandler::MapValuesArrayTy CurSizes; 6029 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 6030 6031 // Get mappable expression information. 6032 MappableExprsHandler MEHandler(D, CGF); 6033 6034 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 6035 auto RI = CS.getCapturedRecordDecl()->field_begin(); 6036 auto CV = CapturedVars.begin(); 6037 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 6038 CE = CS.capture_end(); 6039 CI != CE; ++CI, ++RI, ++CV) { 6040 StringRef Name; 6041 QualType Ty; 6042 6043 CurBasePointers.clear(); 6044 CurPointers.clear(); 6045 CurSizes.clear(); 6046 CurMapTypes.clear(); 6047 6048 // VLA sizes are passed to the outlined region by copy and do not have map 6049 // information associated. 6050 if (CI->capturesVariableArrayType()) { 6051 CurBasePointers.push_back(*CV); 6052 CurPointers.push_back(*CV); 6053 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 6054 // Copy to the device as an argument. No need to retrieve it. 6055 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | 6056 MappableExprsHandler::OMP_MAP_FIRST_REF); 6057 } else { 6058 // If we have any information in the map clause, we use it, otherwise we 6059 // just do a default mapping. 6060 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 6061 CurSizes, CurMapTypes); 6062 if (CurBasePointers.empty()) 6063 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 6064 CurPointers, CurSizes, CurMapTypes); 6065 } 6066 // We expect to have at least an element of information for this capture. 6067 assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); 6068 assert(CurBasePointers.size() == CurPointers.size() && 6069 CurBasePointers.size() == CurSizes.size() && 6070 CurBasePointers.size() == CurMapTypes.size() && 6071 "Inconsistent map information sizes!"); 6072 6073 // The kernel args are always the first elements of the base pointers 6074 // associated with a capture. 6075 KernelArgs.push_back(*CurBasePointers.front()); 6076 // We need to append the results of this capture to what we already have. 6077 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 6078 Pointers.append(CurPointers.begin(), CurPointers.end()); 6079 Sizes.append(CurSizes.begin(), CurSizes.end()); 6080 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 6081 } 6082 6083 // Keep track on whether the host function has to be executed. 6084 auto OffloadErrorQType = 6085 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 6086 auto OffloadError = CGF.MakeAddrLValue( 6087 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 6088 OffloadErrorQType); 6089 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 6090 OffloadError); 6091 6092 // Fill up the pointer arrays and transfer execution to the device. 6093 auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device, 6094 OutlinedFnID, OffloadError, 6095 &D](CodeGenFunction &CGF, PrePostActionTy &) { 6096 auto &RT = CGF.CGM.getOpenMPRuntime(); 6097 // Emit the offloading arrays. 6098 TargetDataInfo Info; 6099 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6100 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 6101 Info.PointersArray, Info.SizesArray, 6102 Info.MapTypesArray, Info); 6103 6104 // On top of the arrays that were filled up, the target offloading call 6105 // takes as arguments the device id as well as the host pointer. The host 6106 // pointer is used by the runtime library to identify the current target 6107 // region, so it only has to be unique and not necessarily point to 6108 // anything. It could be the pointer to the outlined function that 6109 // implements the target region, but we aren't using that so that the 6110 // compiler doesn't need to keep that, and could therefore inline the host 6111 // function if proven worthwhile during optimization. 6112 6113 // From this point on, we need to have an ID of the target region defined. 6114 assert(OutlinedFnID && "Invalid outlined function ID!"); 6115 6116 // Emit device ID if any. 6117 llvm::Value *DeviceID; 6118 if (Device) 6119 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6120 CGF.Int32Ty, /*isSigned=*/true); 6121 else 6122 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6123 6124 // Emit the number of elements in the offloading arrays. 6125 llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6126 6127 // Return value of the runtime offloading call. 6128 llvm::Value *Return; 6129 6130 auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); 6131 auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); 6132 6133 // The target region is an outlined function launched by the runtime 6134 // via calls __tgt_target() or __tgt_target_teams(). 6135 // 6136 // __tgt_target() launches a target region with one team and one thread, 6137 // executing a serial region. This master thread may in turn launch 6138 // more threads within its team upon encountering a parallel region, 6139 // however, no additional teams can be launched on the device. 6140 // 6141 // __tgt_target_teams() launches a target region with one or more teams, 6142 // each with one or more threads. This call is required for target 6143 // constructs such as: 6144 // 'target teams' 6145 // 'target' / 'teams' 6146 // 'target teams distribute parallel for' 6147 // 'target parallel' 6148 // and so on. 6149 // 6150 // Note that on the host and CPU targets, the runtime implementation of 6151 // these calls simply call the outlined function without forking threads. 6152 // The outlined functions themselves have runtime calls to 6153 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 6154 // the compiler in emitTeamsCall() and emitParallelCall(). 6155 // 6156 // In contrast, on the NVPTX target, the implementation of 6157 // __tgt_target_teams() launches a GPU kernel with the requested number 6158 // of teams and threads so no additional calls to the runtime are required. 6159 if (NumTeams) { 6160 // If we have NumTeams defined this means that we have an enclosed teams 6161 // region. Therefore we also expect to have NumThreads defined. These two 6162 // values should be defined in the presence of a teams directive, 6163 // regardless of having any clauses associated. If the user is using teams 6164 // but no clauses, these two values will be the default that should be 6165 // passed to the runtime library - a 32-bit integer with the value zero. 6166 assert(NumThreads && "Thread limit expression should be available along " 6167 "with number of teams."); 6168 llvm::Value *OffloadingArgs[] = { 6169 DeviceID, OutlinedFnID, 6170 PointerNum, Info.BasePointersArray, 6171 Info.PointersArray, Info.SizesArray, 6172 Info.MapTypesArray, NumTeams, 6173 NumThreads}; 6174 Return = CGF.EmitRuntimeCall( 6175 RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); 6176 } else { 6177 llvm::Value *OffloadingArgs[] = { 6178 DeviceID, OutlinedFnID, 6179 PointerNum, Info.BasePointersArray, 6180 Info.PointersArray, Info.SizesArray, 6181 Info.MapTypesArray}; 6182 Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), 6183 OffloadingArgs); 6184 } 6185 6186 CGF.EmitStoreOfScalar(Return, OffloadError); 6187 }; 6188 6189 // Notify that the host version must be executed. 6190 auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { 6191 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), 6192 OffloadError); 6193 }; 6194 6195 // If we have a target function ID it means that we need to support 6196 // offloading, otherwise, just execute on the host. We need to execute on host 6197 // regardless of the conditional in the if clause if, e.g., the user do not 6198 // specify target triples. 6199 if (OutlinedFnID) { 6200 if (IfCond) 6201 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6202 else { 6203 RegionCodeGenTy ThenRCG(ThenGen); 6204 ThenRCG(CGF); 6205 } 6206 } else { 6207 RegionCodeGenTy ElseRCG(ElseGen); 6208 ElseRCG(CGF); 6209 } 6210 6211 // Check the error code and execute the host version if required. 6212 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 6213 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 6214 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 6215 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 6216 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 6217 6218 CGF.EmitBlock(OffloadFailedBlock); 6219 CGF.Builder.CreateCall(OutlinedFn, KernelArgs); 6220 CGF.EmitBranch(OffloadContBlock); 6221 6222 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 6223 } 6224 6225 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 6226 StringRef ParentName) { 6227 if (!S) 6228 return; 6229 6230 // Codegen OMP target directives that offload compute to the device. 6231 bool requiresDeviceCodegen = 6232 isa<OMPExecutableDirective>(S) && 6233 isOpenMPTargetExecutionDirective( 6234 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 6235 6236 if (requiresDeviceCodegen) { 6237 auto &E = *cast<OMPExecutableDirective>(S); 6238 unsigned DeviceID; 6239 unsigned FileID; 6240 unsigned Line; 6241 getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, 6242 FileID, Line); 6243 6244 // Is this a target region that should not be emitted as an entry point? If 6245 // so just signal we are done with this target region. 6246 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 6247 ParentName, Line)) 6248 return; 6249 6250 switch (S->getStmtClass()) { 6251 case Stmt::OMPTargetDirectiveClass: 6252 CodeGenFunction::EmitOMPTargetDeviceFunction( 6253 CGM, ParentName, cast<OMPTargetDirective>(*S)); 6254 break; 6255 case Stmt::OMPTargetParallelDirectiveClass: 6256 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 6257 CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); 6258 break; 6259 case Stmt::OMPTargetTeamsDirectiveClass: 6260 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 6261 CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); 6262 break; 6263 default: 6264 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 6265 } 6266 return; 6267 } 6268 6269 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 6270 if (!E->hasAssociatedStmt()) 6271 return; 6272 6273 scanForTargetRegionsFunctions( 6274 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 6275 ParentName); 6276 return; 6277 } 6278 6279 // If this is a lambda function, look into its body. 6280 if (auto *L = dyn_cast<LambdaExpr>(S)) 6281 S = L->getBody(); 6282 6283 // Keep looking for target regions recursively. 6284 for (auto *II : S->children()) 6285 scanForTargetRegionsFunctions(II, ParentName); 6286 } 6287 6288 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 6289 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 6290 6291 // If emitting code for the host, we do not process FD here. Instead we do 6292 // the normal code generation. 6293 if (!CGM.getLangOpts().OpenMPIsDevice) 6294 return false; 6295 6296 // Try to detect target regions in the function. 6297 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 6298 6299 // We should not emit any function other that the ones created during the 6300 // scanning. Therefore, we signal that this function is completely dealt 6301 // with. 6302 return true; 6303 } 6304 6305 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 6306 if (!CGM.getLangOpts().OpenMPIsDevice) 6307 return false; 6308 6309 // Check if there are Ctors/Dtors in this declaration and look for target 6310 // regions in it. We use the complete variant to produce the kernel name 6311 // mangling. 6312 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 6313 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 6314 for (auto *Ctor : RD->ctors()) { 6315 StringRef ParentName = 6316 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 6317 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 6318 } 6319 auto *Dtor = RD->getDestructor(); 6320 if (Dtor) { 6321 StringRef ParentName = 6322 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 6323 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 6324 } 6325 } 6326 6327 // If we are in target mode we do not emit any global (declare target is not 6328 // implemented yet). Therefore we signal that GD was processed in this case. 6329 return true; 6330 } 6331 6332 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 6333 auto *VD = GD.getDecl(); 6334 if (isa<FunctionDecl>(VD)) 6335 return emitTargetFunctions(GD); 6336 6337 return emitTargetGlobalVariable(GD); 6338 } 6339 6340 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 6341 // If we have offloading in the current module, we need to emit the entries 6342 // now and register the offloading descriptor. 6343 createOffloadEntriesAndInfoMetadata(); 6344 6345 // Create and register the offloading binary descriptors. This is the main 6346 // entity that captures all the information about offloading in the current 6347 // compilation unit. 6348 return createOffloadingBinaryDescriptorRegistration(); 6349 } 6350 6351 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 6352 const OMPExecutableDirective &D, 6353 SourceLocation Loc, 6354 llvm::Value *OutlinedFn, 6355 ArrayRef<llvm::Value *> CapturedVars) { 6356 if (!CGF.HaveInsertPoint()) 6357 return; 6358 6359 auto *RTLoc = emitUpdateLocation(CGF, Loc); 6360 CodeGenFunction::RunCleanupsScope Scope(CGF); 6361 6362 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 6363 llvm::Value *Args[] = { 6364 RTLoc, 6365 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 6366 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 6367 llvm::SmallVector<llvm::Value *, 16> RealArgs; 6368 RealArgs.append(std::begin(Args), std::end(Args)); 6369 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 6370 6371 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 6372 CGF.EmitRuntimeCall(RTLFn, RealArgs); 6373 } 6374 6375 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 6376 const Expr *NumTeams, 6377 const Expr *ThreadLimit, 6378 SourceLocation Loc) { 6379 if (!CGF.HaveInsertPoint()) 6380 return; 6381 6382 auto *RTLoc = emitUpdateLocation(CGF, Loc); 6383 6384 llvm::Value *NumTeamsVal = 6385 (NumTeams) 6386 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 6387 CGF.CGM.Int32Ty, /* isSigned = */ true) 6388 : CGF.Builder.getInt32(0); 6389 6390 llvm::Value *ThreadLimitVal = 6391 (ThreadLimit) 6392 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 6393 CGF.CGM.Int32Ty, /* isSigned = */ true) 6394 : CGF.Builder.getInt32(0); 6395 6396 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 6397 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 6398 ThreadLimitVal}; 6399 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 6400 PushNumTeamsArgs); 6401 } 6402 6403 void CGOpenMPRuntime::emitTargetDataCalls( 6404 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6405 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 6406 if (!CGF.HaveInsertPoint()) 6407 return; 6408 6409 // Action used to replace the default codegen action and turn privatization 6410 // off. 6411 PrePostActionTy NoPrivAction; 6412 6413 // Generate the code for the opening of the data environment. Capture all the 6414 // arguments of the runtime call by reference because they are used in the 6415 // closing of the region. 6416 auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF, 6417 PrePostActionTy &) { 6418 // Fill up the arrays with all the mapped variables. 6419 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6420 MappableExprsHandler::MapValuesArrayTy Pointers; 6421 MappableExprsHandler::MapValuesArrayTy Sizes; 6422 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6423 6424 // Get map clause information. 6425 MappableExprsHandler MCHandler(D, CGF); 6426 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6427 6428 // Fill up the arrays and create the arguments. 6429 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6430 6431 llvm::Value *BasePointersArrayArg = nullptr; 6432 llvm::Value *PointersArrayArg = nullptr; 6433 llvm::Value *SizesArrayArg = nullptr; 6434 llvm::Value *MapTypesArrayArg = nullptr; 6435 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 6436 SizesArrayArg, MapTypesArrayArg, Info); 6437 6438 // Emit device ID if any. 6439 llvm::Value *DeviceID = nullptr; 6440 if (Device) 6441 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6442 CGF.Int32Ty, /*isSigned=*/true); 6443 else 6444 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6445 6446 // Emit the number of elements in the offloading arrays. 6447 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 6448 6449 llvm::Value *OffloadingArgs[] = { 6450 DeviceID, PointerNum, BasePointersArrayArg, 6451 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6452 auto &RT = CGF.CGM.getOpenMPRuntime(); 6453 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), 6454 OffloadingArgs); 6455 6456 // If device pointer privatization is required, emit the body of the region 6457 // here. It will have to be duplicated: with and without privatization. 6458 if (!Info.CaptureDeviceAddrMap.empty()) 6459 CodeGen(CGF); 6460 }; 6461 6462 // Generate code for the closing of the data region. 6463 auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { 6464 assert(Info.isValid() && "Invalid data environment closing arguments."); 6465 6466 llvm::Value *BasePointersArrayArg = nullptr; 6467 llvm::Value *PointersArrayArg = nullptr; 6468 llvm::Value *SizesArrayArg = nullptr; 6469 llvm::Value *MapTypesArrayArg = nullptr; 6470 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 6471 SizesArrayArg, MapTypesArrayArg, Info); 6472 6473 // Emit device ID if any. 6474 llvm::Value *DeviceID = nullptr; 6475 if (Device) 6476 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6477 CGF.Int32Ty, /*isSigned=*/true); 6478 else 6479 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6480 6481 // Emit the number of elements in the offloading arrays. 6482 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 6483 6484 llvm::Value *OffloadingArgs[] = { 6485 DeviceID, PointerNum, BasePointersArrayArg, 6486 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6487 auto &RT = CGF.CGM.getOpenMPRuntime(); 6488 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), 6489 OffloadingArgs); 6490 }; 6491 6492 // If we need device pointer privatization, we need to emit the body of the 6493 // region with no privatization in the 'else' branch of the conditional. 6494 // Otherwise, we don't have to do anything. 6495 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 6496 PrePostActionTy &) { 6497 if (!Info.CaptureDeviceAddrMap.empty()) { 6498 CodeGen.setAction(NoPrivAction); 6499 CodeGen(CGF); 6500 } 6501 }; 6502 6503 // We don't have to do anything to close the region if the if clause evaluates 6504 // to false. 6505 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6506 6507 if (IfCond) { 6508 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 6509 } else { 6510 RegionCodeGenTy RCG(BeginThenGen); 6511 RCG(CGF); 6512 } 6513 6514 // If we don't require privatization of device pointers, we emit the body in 6515 // between the runtime calls. This avoids duplicating the body code. 6516 if (Info.CaptureDeviceAddrMap.empty()) { 6517 CodeGen.setAction(NoPrivAction); 6518 CodeGen(CGF); 6519 } 6520 6521 if (IfCond) { 6522 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 6523 } else { 6524 RegionCodeGenTy RCG(EndThenGen); 6525 RCG(CGF); 6526 } 6527 } 6528 6529 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 6530 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6531 const Expr *Device) { 6532 if (!CGF.HaveInsertPoint()) 6533 return; 6534 6535 assert((isa<OMPTargetEnterDataDirective>(D) || 6536 isa<OMPTargetExitDataDirective>(D) || 6537 isa<OMPTargetUpdateDirective>(D)) && 6538 "Expecting either target enter, exit data, or update directives."); 6539 6540 // Generate the code for the opening of the data environment. 6541 auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { 6542 // Fill up the arrays with all the mapped variables. 6543 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6544 MappableExprsHandler::MapValuesArrayTy Pointers; 6545 MappableExprsHandler::MapValuesArrayTy Sizes; 6546 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6547 6548 // Get map clause information. 6549 MappableExprsHandler MEHandler(D, CGF); 6550 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6551 6552 // Fill up the arrays and create the arguments. 6553 TargetDataInfo Info; 6554 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6555 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 6556 Info.PointersArray, Info.SizesArray, 6557 Info.MapTypesArray, Info); 6558 6559 // Emit device ID if any. 6560 llvm::Value *DeviceID = nullptr; 6561 if (Device) 6562 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6563 CGF.Int32Ty, /*isSigned=*/true); 6564 else 6565 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6566 6567 // Emit the number of elements in the offloading arrays. 6568 auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6569 6570 llvm::Value *OffloadingArgs[] = { 6571 DeviceID, PointerNum, Info.BasePointersArray, 6572 Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; 6573 6574 auto &RT = CGF.CGM.getOpenMPRuntime(); 6575 // Select the right runtime function call for each expected standalone 6576 // directive. 6577 OpenMPRTLFunction RTLFn; 6578 switch (D.getDirectiveKind()) { 6579 default: 6580 llvm_unreachable("Unexpected standalone target data directive."); 6581 break; 6582 case OMPD_target_enter_data: 6583 RTLFn = OMPRTL__tgt_target_data_begin; 6584 break; 6585 case OMPD_target_exit_data: 6586 RTLFn = OMPRTL__tgt_target_data_end; 6587 break; 6588 case OMPD_target_update: 6589 RTLFn = OMPRTL__tgt_target_data_update; 6590 break; 6591 } 6592 CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); 6593 }; 6594 6595 // In the event we get an if clause, we don't have to take any action on the 6596 // else side. 6597 auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6598 6599 if (IfCond) { 6600 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6601 } else { 6602 RegionCodeGenTy ThenGenRCG(ThenGen); 6603 ThenGenRCG(CGF); 6604 } 6605 } 6606 6607 namespace { 6608 /// Kind of parameter in a function with 'declare simd' directive. 6609 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 6610 /// Attribute set of the parameter. 6611 struct ParamAttrTy { 6612 ParamKindTy Kind = Vector; 6613 llvm::APSInt StrideOrArg; 6614 llvm::APSInt Alignment; 6615 }; 6616 } // namespace 6617 6618 static unsigned evaluateCDTSize(const FunctionDecl *FD, 6619 ArrayRef<ParamAttrTy> ParamAttrs) { 6620 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 6621 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 6622 // of that clause. The VLEN value must be power of 2. 6623 // In other case the notion of the function`s "characteristic data type" (CDT) 6624 // is used to compute the vector length. 6625 // CDT is defined in the following order: 6626 // a) For non-void function, the CDT is the return type. 6627 // b) If the function has any non-uniform, non-linear parameters, then the 6628 // CDT is the type of the first such parameter. 6629 // c) If the CDT determined by a) or b) above is struct, union, or class 6630 // type which is pass-by-value (except for the type that maps to the 6631 // built-in complex data type), the characteristic data type is int. 6632 // d) If none of the above three cases is applicable, the CDT is int. 6633 // The VLEN is then determined based on the CDT and the size of vector 6634 // register of that ISA for which current vector version is generated. The 6635 // VLEN is computed using the formula below: 6636 // VLEN = sizeof(vector_register) / sizeof(CDT), 6637 // where vector register size specified in section 3.2.1 Registers and the 6638 // Stack Frame of original AMD64 ABI document. 6639 QualType RetType = FD->getReturnType(); 6640 if (RetType.isNull()) 6641 return 0; 6642 ASTContext &C = FD->getASTContext(); 6643 QualType CDT; 6644 if (!RetType.isNull() && !RetType->isVoidType()) 6645 CDT = RetType; 6646 else { 6647 unsigned Offset = 0; 6648 if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 6649 if (ParamAttrs[Offset].Kind == Vector) 6650 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 6651 ++Offset; 6652 } 6653 if (CDT.isNull()) { 6654 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 6655 if (ParamAttrs[I + Offset].Kind == Vector) { 6656 CDT = FD->getParamDecl(I)->getType(); 6657 break; 6658 } 6659 } 6660 } 6661 } 6662 if (CDT.isNull()) 6663 CDT = C.IntTy; 6664 CDT = CDT->getCanonicalTypeUnqualified(); 6665 if (CDT->isRecordType() || CDT->isUnionType()) 6666 CDT = C.IntTy; 6667 return C.getTypeSize(CDT); 6668 } 6669 6670 static void 6671 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 6672 const llvm::APSInt &VLENVal, 6673 ArrayRef<ParamAttrTy> ParamAttrs, 6674 OMPDeclareSimdDeclAttr::BranchStateTy State) { 6675 struct ISADataTy { 6676 char ISA; 6677 unsigned VecRegSize; 6678 }; 6679 ISADataTy ISAData[] = { 6680 { 6681 'b', 128 6682 }, // SSE 6683 { 6684 'c', 256 6685 }, // AVX 6686 { 6687 'd', 256 6688 }, // AVX2 6689 { 6690 'e', 512 6691 }, // AVX512 6692 }; 6693 llvm::SmallVector<char, 2> Masked; 6694 switch (State) { 6695 case OMPDeclareSimdDeclAttr::BS_Undefined: 6696 Masked.push_back('N'); 6697 Masked.push_back('M'); 6698 break; 6699 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 6700 Masked.push_back('N'); 6701 break; 6702 case OMPDeclareSimdDeclAttr::BS_Inbranch: 6703 Masked.push_back('M'); 6704 break; 6705 } 6706 for (auto Mask : Masked) { 6707 for (auto &Data : ISAData) { 6708 SmallString<256> Buffer; 6709 llvm::raw_svector_ostream Out(Buffer); 6710 Out << "_ZGV" << Data.ISA << Mask; 6711 if (!VLENVal) { 6712 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 6713 evaluateCDTSize(FD, ParamAttrs)); 6714 } else 6715 Out << VLENVal; 6716 for (auto &ParamAttr : ParamAttrs) { 6717 switch (ParamAttr.Kind){ 6718 case LinearWithVarStride: 6719 Out << 's' << ParamAttr.StrideOrArg; 6720 break; 6721 case Linear: 6722 Out << 'l'; 6723 if (!!ParamAttr.StrideOrArg) 6724 Out << ParamAttr.StrideOrArg; 6725 break; 6726 case Uniform: 6727 Out << 'u'; 6728 break; 6729 case Vector: 6730 Out << 'v'; 6731 break; 6732 } 6733 if (!!ParamAttr.Alignment) 6734 Out << 'a' << ParamAttr.Alignment; 6735 } 6736 Out << '_' << Fn->getName(); 6737 Fn->addFnAttr(Out.str()); 6738 } 6739 } 6740 } 6741 6742 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 6743 llvm::Function *Fn) { 6744 ASTContext &C = CGM.getContext(); 6745 FD = FD->getCanonicalDecl(); 6746 // Map params to their positions in function decl. 6747 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 6748 if (isa<CXXMethodDecl>(FD)) 6749 ParamPositions.insert({FD, 0}); 6750 unsigned ParamPos = ParamPositions.size(); 6751 for (auto *P : FD->parameters()) { 6752 ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); 6753 ++ParamPos; 6754 } 6755 for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 6756 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 6757 // Mark uniform parameters. 6758 for (auto *E : Attr->uniforms()) { 6759 E = E->IgnoreParenImpCasts(); 6760 unsigned Pos; 6761 if (isa<CXXThisExpr>(E)) 6762 Pos = ParamPositions[FD]; 6763 else { 6764 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6765 ->getCanonicalDecl(); 6766 Pos = ParamPositions[PVD]; 6767 } 6768 ParamAttrs[Pos].Kind = Uniform; 6769 } 6770 // Get alignment info. 6771 auto NI = Attr->alignments_begin(); 6772 for (auto *E : Attr->aligneds()) { 6773 E = E->IgnoreParenImpCasts(); 6774 unsigned Pos; 6775 QualType ParmTy; 6776 if (isa<CXXThisExpr>(E)) { 6777 Pos = ParamPositions[FD]; 6778 ParmTy = E->getType(); 6779 } else { 6780 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6781 ->getCanonicalDecl(); 6782 Pos = ParamPositions[PVD]; 6783 ParmTy = PVD->getType(); 6784 } 6785 ParamAttrs[Pos].Alignment = 6786 (*NI) ? (*NI)->EvaluateKnownConstInt(C) 6787 : llvm::APSInt::getUnsigned( 6788 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 6789 .getQuantity()); 6790 ++NI; 6791 } 6792 // Mark linear parameters. 6793 auto SI = Attr->steps_begin(); 6794 auto MI = Attr->modifiers_begin(); 6795 for (auto *E : Attr->linears()) { 6796 E = E->IgnoreParenImpCasts(); 6797 unsigned Pos; 6798 if (isa<CXXThisExpr>(E)) 6799 Pos = ParamPositions[FD]; 6800 else { 6801 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6802 ->getCanonicalDecl(); 6803 Pos = ParamPositions[PVD]; 6804 } 6805 auto &ParamAttr = ParamAttrs[Pos]; 6806 ParamAttr.Kind = Linear; 6807 if (*SI) { 6808 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 6809 Expr::SE_AllowSideEffects)) { 6810 if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 6811 if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 6812 ParamAttr.Kind = LinearWithVarStride; 6813 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 6814 ParamPositions[StridePVD->getCanonicalDecl()]); 6815 } 6816 } 6817 } 6818 } 6819 ++SI; 6820 ++MI; 6821 } 6822 llvm::APSInt VLENVal; 6823 if (const Expr *VLEN = Attr->getSimdlen()) 6824 VLENVal = VLEN->EvaluateKnownConstInt(C); 6825 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 6826 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 6827 CGM.getTriple().getArch() == llvm::Triple::x86_64) 6828 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 6829 } 6830 } 6831 6832 namespace { 6833 /// Cleanup action for doacross support. 6834 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 6835 public: 6836 static const int DoacrossFinArgs = 2; 6837 6838 private: 6839 llvm::Value *RTLFn; 6840 llvm::Value *Args[DoacrossFinArgs]; 6841 6842 public: 6843 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 6844 : RTLFn(RTLFn) { 6845 assert(CallArgs.size() == DoacrossFinArgs); 6846 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 6847 } 6848 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 6849 if (!CGF.HaveInsertPoint()) 6850 return; 6851 CGF.EmitRuntimeCall(RTLFn, Args); 6852 } 6853 }; 6854 } // namespace 6855 6856 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 6857 const OMPLoopDirective &D) { 6858 if (!CGF.HaveInsertPoint()) 6859 return; 6860 6861 ASTContext &C = CGM.getContext(); 6862 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 6863 RecordDecl *RD; 6864 if (KmpDimTy.isNull()) { 6865 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 6866 // kmp_int64 lo; // lower 6867 // kmp_int64 up; // upper 6868 // kmp_int64 st; // stride 6869 // }; 6870 RD = C.buildImplicitRecord("kmp_dim"); 6871 RD->startDefinition(); 6872 addFieldToRecordDecl(C, RD, Int64Ty); 6873 addFieldToRecordDecl(C, RD, Int64Ty); 6874 addFieldToRecordDecl(C, RD, Int64Ty); 6875 RD->completeDefinition(); 6876 KmpDimTy = C.getRecordType(RD); 6877 } else 6878 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 6879 6880 Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); 6881 CGF.EmitNullInitialization(DimsAddr, KmpDimTy); 6882 enum { LowerFD = 0, UpperFD, StrideFD }; 6883 // Fill dims with data. 6884 LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); 6885 // dims.upper = num_iterations; 6886 LValue UpperLVal = 6887 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); 6888 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 6889 CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), 6890 Int64Ty, D.getNumIterations()->getExprLoc()); 6891 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 6892 // dims.stride = 1; 6893 LValue StrideLVal = 6894 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); 6895 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 6896 StrideLVal); 6897 6898 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 6899 // kmp_int32 num_dims, struct kmp_dim * dims); 6900 llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), 6901 getThreadID(CGF, D.getLocStart()), 6902 llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), 6903 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6904 DimsAddr.getPointer(), CGM.VoidPtrTy)}; 6905 6906 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 6907 CGF.EmitRuntimeCall(RTLFn, Args); 6908 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 6909 emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; 6910 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 6911 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 6912 llvm::makeArrayRef(FiniArgs)); 6913 } 6914 6915 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 6916 const OMPDependClause *C) { 6917 QualType Int64Ty = 6918 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 6919 const Expr *CounterVal = C->getCounterValue(); 6920 assert(CounterVal); 6921 llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), 6922 CounterVal->getType(), Int64Ty, 6923 CounterVal->getExprLoc()); 6924 Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); 6925 CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); 6926 llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), 6927 getThreadID(CGF, C->getLocStart()), 6928 CntAddr.getPointer()}; 6929 llvm::Value *RTLFn; 6930 if (C->getDependencyKind() == OMPC_DEPEND_source) 6931 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 6932 else { 6933 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 6934 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 6935 } 6936 CGF.EmitRuntimeCall(RTLFn, Args); 6937 } 6938 6939