1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/Bitcode/BitcodeReader.h" 23 #include "llvm/IR/CallSite.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// \brief Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// \brief Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// \brief Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// \brief Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// \brief Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// \brief Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// \brief Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// \brief Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// \brief Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// \brief API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// \brief Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// \brief Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// \brief A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// \brief API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); 150 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 151 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 152 CGF.EmitBlock(DoneBB); 153 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 154 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 155 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 156 CGF.Builder.GetInsertBlock()); 157 emitUntiedSwitch(CGF); 158 } 159 } 160 void emitUntiedSwitch(CodeGenFunction &CGF) const { 161 if (Untied) { 162 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 166 PartIdLVal); 167 UntiedCodeGen(CGF); 168 CodeGenFunction::JumpDest CurPoint = 169 CGF.getJumpDestInCurrentScope(".untied.next."); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 173 CGF.Builder.GetInsertBlock()); 174 CGF.EmitBranchThroughCleanup(CurPoint); 175 CGF.EmitBlock(CurPoint.getBlock()); 176 } 177 } 178 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 179 }; 180 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 181 const VarDecl *ThreadIDVar, 182 const RegionCodeGenTy &CodeGen, 183 OpenMPDirectiveKind Kind, bool HasCancel, 184 const UntiedTaskActionTy &Action) 185 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 186 ThreadIDVar(ThreadIDVar), Action(Action) { 187 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 188 } 189 190 /// \brief Get a variable or parameter for storing global thread id 191 /// inside OpenMP construct. 192 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 193 194 /// \brief Get an LValue for the current ThreadID variable. 195 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 196 197 /// \brief Get the name of the capture helper. 198 StringRef getHelperName() const override { return ".omp_outlined."; } 199 200 void emitUntiedSwitch(CodeGenFunction &CGF) override { 201 Action.emitUntiedSwitch(CGF); 202 } 203 204 static bool classof(const CGCapturedStmtInfo *Info) { 205 return CGOpenMPRegionInfo::classof(Info) && 206 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 207 TaskOutlinedRegion; 208 } 209 210 private: 211 /// \brief A variable or parameter storing global thread id for OpenMP 212 /// constructs. 213 const VarDecl *ThreadIDVar; 214 /// Action for emitting code for untied tasks. 215 const UntiedTaskActionTy &Action; 216 }; 217 218 /// \brief API for inlined captured statement code generation in OpenMP 219 /// constructs. 220 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 221 public: 222 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 223 const RegionCodeGenTy &CodeGen, 224 OpenMPDirectiveKind Kind, bool HasCancel) 225 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 226 OldCSI(OldCSI), 227 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 228 229 // \brief Retrieve the value of the context parameter. 230 llvm::Value *getContextValue() const override { 231 if (OuterRegionInfo) 232 return OuterRegionInfo->getContextValue(); 233 llvm_unreachable("No context value for inlined OpenMP region"); 234 } 235 236 void setContextValue(llvm::Value *V) override { 237 if (OuterRegionInfo) { 238 OuterRegionInfo->setContextValue(V); 239 return; 240 } 241 llvm_unreachable("No context value for inlined OpenMP region"); 242 } 243 244 /// \brief Lookup the captured field decl for a variable. 245 const FieldDecl *lookup(const VarDecl *VD) const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->lookup(VD); 248 // If there is no outer outlined region,no need to lookup in a list of 249 // captured variables, we can use the original one. 250 return nullptr; 251 } 252 253 FieldDecl *getThisFieldDecl() const override { 254 if (OuterRegionInfo) 255 return OuterRegionInfo->getThisFieldDecl(); 256 return nullptr; 257 } 258 259 /// \brief Get a variable or parameter for storing global thread id 260 /// inside OpenMP construct. 261 const VarDecl *getThreadIDVariable() const override { 262 if (OuterRegionInfo) 263 return OuterRegionInfo->getThreadIDVariable(); 264 return nullptr; 265 } 266 267 /// \brief Get the name of the capture helper. 268 StringRef getHelperName() const override { 269 if (auto *OuterRegionInfo = getOldCSI()) 270 return OuterRegionInfo->getHelperName(); 271 llvm_unreachable("No helper name for inlined OpenMP construct"); 272 } 273 274 void emitUntiedSwitch(CodeGenFunction &CGF) override { 275 if (OuterRegionInfo) 276 OuterRegionInfo->emitUntiedSwitch(CGF); 277 } 278 279 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 280 281 static bool classof(const CGCapturedStmtInfo *Info) { 282 return CGOpenMPRegionInfo::classof(Info) && 283 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 284 } 285 286 ~CGOpenMPInlinedRegionInfo() override = default; 287 288 private: 289 /// \brief CodeGen info about outer OpenMP region. 290 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 291 CGOpenMPRegionInfo *OuterRegionInfo; 292 }; 293 294 /// \brief API for captured statement code generation in OpenMP target 295 /// constructs. For this captures, implicit parameters are used instead of the 296 /// captured fields. The name of the target region has to be unique in a given 297 /// application so it is provided by the client, because only the client has 298 /// the information to generate that. 299 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 300 public: 301 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 302 const RegionCodeGenTy &CodeGen, StringRef HelperName) 303 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 304 /*HasCancel=*/false), 305 HelperName(HelperName) {} 306 307 /// \brief This is unused for target regions because each starts executing 308 /// with a single thread. 309 const VarDecl *getThreadIDVariable() const override { return nullptr; } 310 311 /// \brief Get the name of the capture helper. 312 StringRef getHelperName() const override { return HelperName; } 313 314 static bool classof(const CGCapturedStmtInfo *Info) { 315 return CGOpenMPRegionInfo::classof(Info) && 316 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 317 } 318 319 private: 320 StringRef HelperName; 321 }; 322 323 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 324 llvm_unreachable("No codegen for expressions"); 325 } 326 /// \brief API for generation of expressions captured in a innermost OpenMP 327 /// region. 328 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 329 public: 330 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 331 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 332 OMPD_unknown, 333 /*HasCancel=*/false), 334 PrivScope(CGF) { 335 // Make sure the globals captured in the provided statement are local by 336 // using the privatization logic. We assume the same variable is not 337 // captured more than once. 338 for (auto &C : CS.captures()) { 339 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 340 continue; 341 342 const VarDecl *VD = C.getCapturedVar(); 343 if (VD->isLocalVarDeclOrParm()) 344 continue; 345 346 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 347 /*RefersToEnclosingVariableOrCapture=*/false, 348 VD->getType().getNonReferenceType(), VK_LValue, 349 SourceLocation()); 350 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 351 return CGF.EmitLValue(&DRE).getAddress(); 352 }); 353 } 354 (void)PrivScope.Privatize(); 355 } 356 357 /// \brief Lookup the captured field decl for a variable. 358 const FieldDecl *lookup(const VarDecl *VD) const override { 359 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 360 return FD; 361 return nullptr; 362 } 363 364 /// \brief Emit the captured statement body. 365 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 366 llvm_unreachable("No body for expressions"); 367 } 368 369 /// \brief Get a variable or parameter for storing global thread id 370 /// inside OpenMP construct. 371 const VarDecl *getThreadIDVariable() const override { 372 llvm_unreachable("No thread id for expressions"); 373 } 374 375 /// \brief Get the name of the capture helper. 376 StringRef getHelperName() const override { 377 llvm_unreachable("No helper name for expressions"); 378 } 379 380 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 381 382 private: 383 /// Private scope to capture global variables. 384 CodeGenFunction::OMPPrivateScope PrivScope; 385 }; 386 387 /// \brief RAII for emitting code of OpenMP constructs. 388 class InlinedOpenMPRegionRAII { 389 CodeGenFunction &CGF; 390 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 391 FieldDecl *LambdaThisCaptureField = nullptr; 392 393 public: 394 /// \brief Constructs region for combined constructs. 395 /// \param CodeGen Code generation sequence for combined directives. Includes 396 /// a list of functions used for code generation of implicitly inlined 397 /// regions. 398 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 399 OpenMPDirectiveKind Kind, bool HasCancel) 400 : CGF(CGF) { 401 // Start emission for the construct. 402 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 403 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 404 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 405 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 406 CGF.LambdaThisCaptureField = nullptr; 407 } 408 409 ~InlinedOpenMPRegionRAII() { 410 // Restore original CapturedStmtInfo only if we're done with code emission. 411 auto *OldCSI = 412 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 413 delete CGF.CapturedStmtInfo; 414 CGF.CapturedStmtInfo = OldCSI; 415 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 416 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 417 } 418 }; 419 420 /// \brief Values for bit flags used in the ident_t to describe the fields. 421 /// All enumeric elements are named and described in accordance with the code 422 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 423 enum OpenMPLocationFlags { 424 /// \brief Use trampoline for internal microtask. 425 OMP_IDENT_IMD = 0x01, 426 /// \brief Use c-style ident structure. 427 OMP_IDENT_KMPC = 0x02, 428 /// \brief Atomic reduction option for kmpc_reduce. 429 OMP_ATOMIC_REDUCE = 0x10, 430 /// \brief Explicit 'barrier' directive. 431 OMP_IDENT_BARRIER_EXPL = 0x20, 432 /// \brief Implicit barrier in code. 433 OMP_IDENT_BARRIER_IMPL = 0x40, 434 /// \brief Implicit barrier in 'for' directive. 435 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 436 /// \brief Implicit barrier in 'sections' directive. 437 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 438 /// \brief Implicit barrier in 'single' directive. 439 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 440 }; 441 442 /// \brief Describes ident structure that describes a source location. 443 /// All descriptions are taken from 444 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 445 /// Original structure: 446 /// typedef struct ident { 447 /// kmp_int32 reserved_1; /**< might be used in Fortran; 448 /// see above */ 449 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 450 /// KMP_IDENT_KMPC identifies this union 451 /// member */ 452 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 453 /// see above */ 454 ///#if USE_ITT_BUILD 455 /// /* but currently used for storing 456 /// region-specific ITT */ 457 /// /* contextual information. */ 458 ///#endif /* USE_ITT_BUILD */ 459 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 460 /// C++ */ 461 /// char const *psource; /**< String describing the source location. 462 /// The string is composed of semi-colon separated 463 // fields which describe the source file, 464 /// the function and a pair of line numbers that 465 /// delimit the construct. 466 /// */ 467 /// } ident_t; 468 enum IdentFieldIndex { 469 /// \brief might be used in Fortran 470 IdentField_Reserved_1, 471 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 472 IdentField_Flags, 473 /// \brief Not really used in Fortran any more 474 IdentField_Reserved_2, 475 /// \brief Source[4] in Fortran, do not use for C++ 476 IdentField_Reserved_3, 477 /// \brief String describing the source location. The string is composed of 478 /// semi-colon separated fields which describe the source file, the function 479 /// and a pair of line numbers that delimit the construct. 480 IdentField_PSource 481 }; 482 483 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 484 /// the enum sched_type in kmp.h). 485 enum OpenMPSchedType { 486 /// \brief Lower bound for default (unordered) versions. 487 OMP_sch_lower = 32, 488 OMP_sch_static_chunked = 33, 489 OMP_sch_static = 34, 490 OMP_sch_dynamic_chunked = 35, 491 OMP_sch_guided_chunked = 36, 492 OMP_sch_runtime = 37, 493 OMP_sch_auto = 38, 494 /// static with chunk adjustment (e.g., simd) 495 OMP_sch_static_balanced_chunked = 45, 496 /// \brief Lower bound for 'ordered' versions. 497 OMP_ord_lower = 64, 498 OMP_ord_static_chunked = 65, 499 OMP_ord_static = 66, 500 OMP_ord_dynamic_chunked = 67, 501 OMP_ord_guided_chunked = 68, 502 OMP_ord_runtime = 69, 503 OMP_ord_auto = 70, 504 OMP_sch_default = OMP_sch_static, 505 /// \brief dist_schedule types 506 OMP_dist_sch_static_chunked = 91, 507 OMP_dist_sch_static = 92, 508 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 509 /// Set if the monotonic schedule modifier was present. 510 OMP_sch_modifier_monotonic = (1 << 29), 511 /// Set if the nonmonotonic schedule modifier was present. 512 OMP_sch_modifier_nonmonotonic = (1 << 30), 513 }; 514 515 enum OpenMPRTLFunction { 516 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 517 /// kmpc_micro microtask, ...); 518 OMPRTL__kmpc_fork_call, 519 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 520 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 521 OMPRTL__kmpc_threadprivate_cached, 522 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 523 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 524 OMPRTL__kmpc_threadprivate_register, 525 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 526 OMPRTL__kmpc_global_thread_num, 527 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 528 // kmp_critical_name *crit); 529 OMPRTL__kmpc_critical, 530 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 531 // global_tid, kmp_critical_name *crit, uintptr_t hint); 532 OMPRTL__kmpc_critical_with_hint, 533 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 534 // kmp_critical_name *crit); 535 OMPRTL__kmpc_end_critical, 536 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 537 // global_tid); 538 OMPRTL__kmpc_cancel_barrier, 539 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 540 OMPRTL__kmpc_barrier, 541 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 542 OMPRTL__kmpc_for_static_fini, 543 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 544 // global_tid); 545 OMPRTL__kmpc_serialized_parallel, 546 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 547 // global_tid); 548 OMPRTL__kmpc_end_serialized_parallel, 549 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 550 // kmp_int32 num_threads); 551 OMPRTL__kmpc_push_num_threads, 552 // Call to void __kmpc_flush(ident_t *loc); 553 OMPRTL__kmpc_flush, 554 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 555 OMPRTL__kmpc_master, 556 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 557 OMPRTL__kmpc_end_master, 558 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 559 // int end_part); 560 OMPRTL__kmpc_omp_taskyield, 561 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 562 OMPRTL__kmpc_single, 563 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 564 OMPRTL__kmpc_end_single, 565 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 566 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 567 // kmp_routine_entry_t *task_entry); 568 OMPRTL__kmpc_omp_task_alloc, 569 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 570 // new_task); 571 OMPRTL__kmpc_omp_task, 572 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 573 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 574 // kmp_int32 didit); 575 OMPRTL__kmpc_copyprivate, 576 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 577 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 578 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 579 OMPRTL__kmpc_reduce, 580 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 581 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 582 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 583 // *lck); 584 OMPRTL__kmpc_reduce_nowait, 585 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 586 // kmp_critical_name *lck); 587 OMPRTL__kmpc_end_reduce, 588 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 589 // kmp_critical_name *lck); 590 OMPRTL__kmpc_end_reduce_nowait, 591 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 592 // kmp_task_t * new_task); 593 OMPRTL__kmpc_omp_task_begin_if0, 594 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 595 // kmp_task_t * new_task); 596 OMPRTL__kmpc_omp_task_complete_if0, 597 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 598 OMPRTL__kmpc_ordered, 599 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 600 OMPRTL__kmpc_end_ordered, 601 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 602 // global_tid); 603 OMPRTL__kmpc_omp_taskwait, 604 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 605 OMPRTL__kmpc_taskgroup, 606 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 607 OMPRTL__kmpc_end_taskgroup, 608 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 609 // int proc_bind); 610 OMPRTL__kmpc_push_proc_bind, 611 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 612 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 613 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 614 OMPRTL__kmpc_omp_task_with_deps, 615 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 616 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 617 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 618 OMPRTL__kmpc_omp_wait_deps, 619 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 620 // global_tid, kmp_int32 cncl_kind); 621 OMPRTL__kmpc_cancellationpoint, 622 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 623 // kmp_int32 cncl_kind); 624 OMPRTL__kmpc_cancel, 625 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_teams, kmp_int32 thread_limit); 627 OMPRTL__kmpc_push_num_teams, 628 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 629 // microtask, ...); 630 OMPRTL__kmpc_fork_teams, 631 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 632 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 633 // sched, kmp_uint64 grainsize, void *task_dup); 634 OMPRTL__kmpc_taskloop, 635 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 636 // num_dims, struct kmp_dim *dims); 637 OMPRTL__kmpc_doacross_init, 638 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 639 OMPRTL__kmpc_doacross_fini, 640 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 641 // *vec); 642 OMPRTL__kmpc_doacross_post, 643 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 644 // *vec); 645 OMPRTL__kmpc_doacross_wait, 646 647 // 648 // Offloading related calls 649 // 650 // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 651 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 652 // *arg_types); 653 OMPRTL__tgt_target, 654 // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 655 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 656 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 657 OMPRTL__tgt_target_teams, 658 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 659 OMPRTL__tgt_register_lib, 660 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 661 OMPRTL__tgt_unregister_lib, 662 // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 663 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 664 OMPRTL__tgt_target_data_begin, 665 // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 666 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 667 OMPRTL__tgt_target_data_end, 668 // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 669 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 670 OMPRTL__tgt_target_data_update, 671 }; 672 673 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 674 /// region. 675 class CleanupTy final : public EHScopeStack::Cleanup { 676 PrePostActionTy *Action; 677 678 public: 679 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 680 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 681 if (!CGF.HaveInsertPoint()) 682 return; 683 Action->Exit(CGF); 684 } 685 }; 686 687 } // anonymous namespace 688 689 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 690 CodeGenFunction::RunCleanupsScope Scope(CGF); 691 if (PrePostAction) { 692 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 693 Callback(CodeGen, CGF, *PrePostAction); 694 } else { 695 PrePostActionTy Action; 696 Callback(CodeGen, CGF, Action); 697 } 698 } 699 700 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 701 return CGF.EmitLoadOfPointerLValue( 702 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 703 getThreadIDVariable()->getType()->castAs<PointerType>()); 704 } 705 706 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 707 if (!CGF.HaveInsertPoint()) 708 return; 709 // 1.2.2 OpenMP Language Terminology 710 // Structured block - An executable statement with a single entry at the 711 // top and a single exit at the bottom. 712 // The point of exit cannot be a branch out of the structured block. 713 // longjmp() and throw() must not violate the entry/exit criteria. 714 CGF.EHStack.pushTerminate(); 715 CodeGen(CGF); 716 CGF.EHStack.popTerminate(); 717 } 718 719 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 720 CodeGenFunction &CGF) { 721 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 722 getThreadIDVariable()->getType(), 723 AlignmentSource::Decl); 724 } 725 726 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 727 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 728 IdentTy = llvm::StructType::create( 729 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 730 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 731 CGM.Int8PtrTy /* psource */, nullptr); 732 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 733 734 loadOffloadInfoMetadata(); 735 } 736 737 void CGOpenMPRuntime::clear() { 738 InternalVars.clear(); 739 } 740 741 static llvm::Function * 742 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 743 const Expr *CombinerInitializer, const VarDecl *In, 744 const VarDecl *Out, bool IsCombiner) { 745 // void .omp_combiner.(Ty *in, Ty *out); 746 auto &C = CGM.getContext(); 747 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 748 FunctionArgList Args; 749 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 750 /*Id=*/nullptr, PtrTy); 751 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 752 /*Id=*/nullptr, PtrTy); 753 Args.push_back(&OmpOutParm); 754 Args.push_back(&OmpInParm); 755 auto &FnInfo = 756 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 757 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 758 auto *Fn = llvm::Function::Create( 759 FnTy, llvm::GlobalValue::InternalLinkage, 760 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 761 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 762 Fn->removeFnAttr(llvm::Attribute::NoInline); 763 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 764 CodeGenFunction CGF(CGM); 765 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 766 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 767 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 768 CodeGenFunction::OMPPrivateScope Scope(CGF); 769 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 770 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 771 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 772 .getAddress(); 773 }); 774 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 775 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 776 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 777 .getAddress(); 778 }); 779 (void)Scope.Privatize(); 780 CGF.EmitIgnoredExpr(CombinerInitializer); 781 Scope.ForceCleanup(); 782 CGF.FinishFunction(); 783 return Fn; 784 } 785 786 void CGOpenMPRuntime::emitUserDefinedReduction( 787 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 788 if (UDRMap.count(D) > 0) 789 return; 790 auto &C = CGM.getContext(); 791 if (!In || !Out) { 792 In = &C.Idents.get("omp_in"); 793 Out = &C.Idents.get("omp_out"); 794 } 795 llvm::Function *Combiner = emitCombinerOrInitializer( 796 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 797 cast<VarDecl>(D->lookup(Out).front()), 798 /*IsCombiner=*/true); 799 llvm::Function *Initializer = nullptr; 800 if (auto *Init = D->getInitializer()) { 801 if (!Priv || !Orig) { 802 Priv = &C.Idents.get("omp_priv"); 803 Orig = &C.Idents.get("omp_orig"); 804 } 805 Initializer = emitCombinerOrInitializer( 806 CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), 807 cast<VarDecl>(D->lookup(Priv).front()), 808 /*IsCombiner=*/false); 809 } 810 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 811 if (CGF) { 812 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 813 Decls.second.push_back(D); 814 } 815 } 816 817 std::pair<llvm::Function *, llvm::Function *> 818 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 819 auto I = UDRMap.find(D); 820 if (I != UDRMap.end()) 821 return I->second; 822 emitUserDefinedReduction(/*CGF=*/nullptr, D); 823 return UDRMap.lookup(D); 824 } 825 826 // Layout information for ident_t. 827 static CharUnits getIdentAlign(CodeGenModule &CGM) { 828 return CGM.getPointerAlign(); 829 } 830 static CharUnits getIdentSize(CodeGenModule &CGM) { 831 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 832 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 833 } 834 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 835 // All the fields except the last are i32, so this works beautifully. 836 return unsigned(Field) * CharUnits::fromQuantity(4); 837 } 838 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 839 IdentFieldIndex Field, 840 const llvm::Twine &Name = "") { 841 auto Offset = getOffsetOfIdentField(Field); 842 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 843 } 844 845 static llvm::Value *emitParallelOrTeamsOutlinedFunction( 846 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 847 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 848 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 849 assert(ThreadIDVar->getType()->isPointerType() && 850 "thread id variable must be of type kmp_int32 *"); 851 CodeGenFunction CGF(CGM, true); 852 bool HasCancel = false; 853 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 854 HasCancel = OPD->hasCancel(); 855 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 856 HasCancel = OPSD->hasCancel(); 857 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 858 HasCancel = OPFD->hasCancel(); 859 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 860 HasCancel, OutlinedHelperName); 861 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 862 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 863 } 864 865 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 866 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 867 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 868 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 869 return emitParallelOrTeamsOutlinedFunction( 870 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 871 } 872 873 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( 874 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 875 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 876 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 877 return emitParallelOrTeamsOutlinedFunction( 878 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 879 } 880 881 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 882 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 883 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 884 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 885 bool Tied, unsigned &NumberOfParts) { 886 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 887 PrePostActionTy &) { 888 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 889 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 890 llvm::Value *TaskArgs[] = { 891 UpLoc, ThreadID, 892 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 893 TaskTVar->getType()->castAs<PointerType>()) 894 .getPointer()}; 895 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 896 }; 897 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 898 UntiedCodeGen); 899 CodeGen.setAction(Action); 900 assert(!ThreadIDVar->getType()->isPointerType() && 901 "thread id variable must be of type kmp_int32 for tasks"); 902 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 903 auto *TD = dyn_cast<OMPTaskDirective>(&D); 904 CodeGenFunction CGF(CGM, true); 905 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 906 InnermostKind, 907 TD ? TD->hasCancel() : false, Action); 908 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 909 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 910 if (!Tied) 911 NumberOfParts = Action.getNumberOfParts(); 912 return Res; 913 } 914 915 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 916 CharUnits Align = getIdentAlign(CGM); 917 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 918 if (!Entry) { 919 if (!DefaultOpenMPPSource) { 920 // Initialize default location for psource field of ident_t structure of 921 // all ident_t objects. Format is ";file;function;line;column;;". 922 // Taken from 923 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 924 DefaultOpenMPPSource = 925 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 926 DefaultOpenMPPSource = 927 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 928 } 929 930 ConstantInitBuilder builder(CGM); 931 auto fields = builder.beginStruct(IdentTy); 932 fields.addInt(CGM.Int32Ty, 0); 933 fields.addInt(CGM.Int32Ty, Flags); 934 fields.addInt(CGM.Int32Ty, 0); 935 fields.addInt(CGM.Int32Ty, 0); 936 fields.add(DefaultOpenMPPSource); 937 auto DefaultOpenMPLocation = 938 fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, 939 llvm::GlobalValue::PrivateLinkage); 940 DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 941 942 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 943 } 944 return Address(Entry, Align); 945 } 946 947 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 948 SourceLocation Loc, 949 unsigned Flags) { 950 Flags |= OMP_IDENT_KMPC; 951 // If no debug info is generated - return global default location. 952 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 953 Loc.isInvalid()) 954 return getOrCreateDefaultLocation(Flags).getPointer(); 955 956 assert(CGF.CurFn && "No function in current CodeGenFunction."); 957 958 Address LocValue = Address::invalid(); 959 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 960 if (I != OpenMPLocThreadIDMap.end()) 961 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 962 963 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 964 // GetOpenMPThreadID was called before this routine. 965 if (!LocValue.isValid()) { 966 // Generate "ident_t .kmpc_loc.addr;" 967 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 968 ".kmpc_loc.addr"); 969 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 970 Elem.second.DebugLoc = AI.getPointer(); 971 LocValue = AI; 972 973 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 974 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 975 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 976 CGM.getSize(getIdentSize(CGF.CGM))); 977 } 978 979 // char **psource = &.kmpc_loc_<flags>.addr.psource; 980 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 981 982 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 983 if (OMPDebugLoc == nullptr) { 984 SmallString<128> Buffer2; 985 llvm::raw_svector_ostream OS2(Buffer2); 986 // Build debug location 987 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 988 OS2 << ";" << PLoc.getFilename() << ";"; 989 if (const FunctionDecl *FD = 990 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 991 OS2 << FD->getQualifiedNameAsString(); 992 } 993 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 994 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 995 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 996 } 997 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 998 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 999 1000 // Our callers always pass this to a runtime function, so for 1001 // convenience, go ahead and return a naked pointer. 1002 return LocValue.getPointer(); 1003 } 1004 1005 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1006 SourceLocation Loc) { 1007 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1008 1009 llvm::Value *ThreadID = nullptr; 1010 // Check whether we've already cached a load of the thread id in this 1011 // function. 1012 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1013 if (I != OpenMPLocThreadIDMap.end()) { 1014 ThreadID = I->second.ThreadID; 1015 if (ThreadID != nullptr) 1016 return ThreadID; 1017 } 1018 if (auto *OMPRegionInfo = 1019 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1020 if (OMPRegionInfo->getThreadIDVariable()) { 1021 // Check if this an outlined function with thread id passed as argument. 1022 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1023 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 1024 // If value loaded in entry block, cache it and use it everywhere in 1025 // function. 1026 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1027 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1028 Elem.second.ThreadID = ThreadID; 1029 } 1030 return ThreadID; 1031 } 1032 } 1033 1034 // This is not an outlined function region - need to call __kmpc_int32 1035 // kmpc_global_thread_num(ident_t *loc). 1036 // Generate thread id value and cache this value for use across the 1037 // function. 1038 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1039 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1040 ThreadID = 1041 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1042 emitUpdateLocation(CGF, Loc)); 1043 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1044 Elem.second.ThreadID = ThreadID; 1045 return ThreadID; 1046 } 1047 1048 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1049 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1050 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1051 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1052 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1053 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1054 UDRMap.erase(D); 1055 } 1056 FunctionUDRMap.erase(CGF.CurFn); 1057 } 1058 } 1059 1060 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1061 if (!IdentTy) { 1062 } 1063 return llvm::PointerType::getUnqual(IdentTy); 1064 } 1065 1066 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1067 if (!Kmpc_MicroTy) { 1068 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1069 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1070 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1071 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1072 } 1073 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1074 } 1075 1076 llvm::Constant * 1077 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1078 llvm::Constant *RTLFn = nullptr; 1079 switch (static_cast<OpenMPRTLFunction>(Function)) { 1080 case OMPRTL__kmpc_fork_call: { 1081 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1082 // microtask, ...); 1083 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1084 getKmpc_MicroPointerTy()}; 1085 llvm::FunctionType *FnTy = 1086 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1087 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1088 break; 1089 } 1090 case OMPRTL__kmpc_global_thread_num: { 1091 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1092 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1093 llvm::FunctionType *FnTy = 1094 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1095 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1096 break; 1097 } 1098 case OMPRTL__kmpc_threadprivate_cached: { 1099 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1100 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1101 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1102 CGM.VoidPtrTy, CGM.SizeTy, 1103 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1104 llvm::FunctionType *FnTy = 1105 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1106 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1107 break; 1108 } 1109 case OMPRTL__kmpc_critical: { 1110 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1111 // kmp_critical_name *crit); 1112 llvm::Type *TypeParams[] = { 1113 getIdentTyPointerTy(), CGM.Int32Ty, 1114 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1115 llvm::FunctionType *FnTy = 1116 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1117 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1118 break; 1119 } 1120 case OMPRTL__kmpc_critical_with_hint: { 1121 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1122 // kmp_critical_name *crit, uintptr_t hint); 1123 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1124 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1125 CGM.IntPtrTy}; 1126 llvm::FunctionType *FnTy = 1127 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1128 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1129 break; 1130 } 1131 case OMPRTL__kmpc_threadprivate_register: { 1132 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1133 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1134 // typedef void *(*kmpc_ctor)(void *); 1135 auto KmpcCtorTy = 1136 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1137 /*isVarArg*/ false)->getPointerTo(); 1138 // typedef void *(*kmpc_cctor)(void *, void *); 1139 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1140 auto KmpcCopyCtorTy = 1141 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1142 /*isVarArg*/ false)->getPointerTo(); 1143 // typedef void (*kmpc_dtor)(void *); 1144 auto KmpcDtorTy = 1145 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1146 ->getPointerTo(); 1147 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1148 KmpcCopyCtorTy, KmpcDtorTy}; 1149 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1150 /*isVarArg*/ false); 1151 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1152 break; 1153 } 1154 case OMPRTL__kmpc_end_critical: { 1155 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1156 // kmp_critical_name *crit); 1157 llvm::Type *TypeParams[] = { 1158 getIdentTyPointerTy(), CGM.Int32Ty, 1159 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1160 llvm::FunctionType *FnTy = 1161 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1162 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1163 break; 1164 } 1165 case OMPRTL__kmpc_cancel_barrier: { 1166 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1167 // global_tid); 1168 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1169 llvm::FunctionType *FnTy = 1170 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1171 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1172 break; 1173 } 1174 case OMPRTL__kmpc_barrier: { 1175 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1176 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1177 llvm::FunctionType *FnTy = 1178 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1179 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1180 break; 1181 } 1182 case OMPRTL__kmpc_for_static_fini: { 1183 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1184 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1185 llvm::FunctionType *FnTy = 1186 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1187 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1188 break; 1189 } 1190 case OMPRTL__kmpc_push_num_threads: { 1191 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1192 // kmp_int32 num_threads) 1193 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1194 CGM.Int32Ty}; 1195 llvm::FunctionType *FnTy = 1196 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1197 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1198 break; 1199 } 1200 case OMPRTL__kmpc_serialized_parallel: { 1201 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1202 // global_tid); 1203 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1204 llvm::FunctionType *FnTy = 1205 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1206 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1207 break; 1208 } 1209 case OMPRTL__kmpc_end_serialized_parallel: { 1210 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1211 // global_tid); 1212 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1213 llvm::FunctionType *FnTy = 1214 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1215 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1216 break; 1217 } 1218 case OMPRTL__kmpc_flush: { 1219 // Build void __kmpc_flush(ident_t *loc); 1220 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1221 llvm::FunctionType *FnTy = 1222 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1223 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1224 break; 1225 } 1226 case OMPRTL__kmpc_master: { 1227 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1228 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1229 llvm::FunctionType *FnTy = 1230 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1231 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1232 break; 1233 } 1234 case OMPRTL__kmpc_end_master: { 1235 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1236 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1237 llvm::FunctionType *FnTy = 1238 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1239 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1240 break; 1241 } 1242 case OMPRTL__kmpc_omp_taskyield: { 1243 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1244 // int end_part); 1245 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1246 llvm::FunctionType *FnTy = 1247 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1248 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1249 break; 1250 } 1251 case OMPRTL__kmpc_single: { 1252 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1253 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1254 llvm::FunctionType *FnTy = 1255 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1256 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1257 break; 1258 } 1259 case OMPRTL__kmpc_end_single: { 1260 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1261 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1262 llvm::FunctionType *FnTy = 1263 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1264 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1265 break; 1266 } 1267 case OMPRTL__kmpc_omp_task_alloc: { 1268 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1269 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1270 // kmp_routine_entry_t *task_entry); 1271 assert(KmpRoutineEntryPtrTy != nullptr && 1272 "Type kmp_routine_entry_t must be created."); 1273 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1274 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1275 // Return void * and then cast to particular kmp_task_t type. 1276 llvm::FunctionType *FnTy = 1277 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1278 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1279 break; 1280 } 1281 case OMPRTL__kmpc_omp_task: { 1282 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1283 // *new_task); 1284 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1285 CGM.VoidPtrTy}; 1286 llvm::FunctionType *FnTy = 1287 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1288 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1289 break; 1290 } 1291 case OMPRTL__kmpc_copyprivate: { 1292 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1293 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1294 // kmp_int32 didit); 1295 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1296 auto *CpyFnTy = 1297 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1298 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1299 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1300 CGM.Int32Ty}; 1301 llvm::FunctionType *FnTy = 1302 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1303 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1304 break; 1305 } 1306 case OMPRTL__kmpc_reduce: { 1307 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1308 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1309 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1310 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1311 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1312 /*isVarArg=*/false); 1313 llvm::Type *TypeParams[] = { 1314 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1315 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1316 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1317 llvm::FunctionType *FnTy = 1318 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1319 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1320 break; 1321 } 1322 case OMPRTL__kmpc_reduce_nowait: { 1323 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1324 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1325 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1326 // *lck); 1327 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1328 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1329 /*isVarArg=*/false); 1330 llvm::Type *TypeParams[] = { 1331 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1332 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1333 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1334 llvm::FunctionType *FnTy = 1335 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1336 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1337 break; 1338 } 1339 case OMPRTL__kmpc_end_reduce: { 1340 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1341 // kmp_critical_name *lck); 1342 llvm::Type *TypeParams[] = { 1343 getIdentTyPointerTy(), CGM.Int32Ty, 1344 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1345 llvm::FunctionType *FnTy = 1346 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1347 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1348 break; 1349 } 1350 case OMPRTL__kmpc_end_reduce_nowait: { 1351 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1352 // kmp_critical_name *lck); 1353 llvm::Type *TypeParams[] = { 1354 getIdentTyPointerTy(), CGM.Int32Ty, 1355 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1356 llvm::FunctionType *FnTy = 1357 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1358 RTLFn = 1359 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1360 break; 1361 } 1362 case OMPRTL__kmpc_omp_task_begin_if0: { 1363 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1364 // *new_task); 1365 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1366 CGM.VoidPtrTy}; 1367 llvm::FunctionType *FnTy = 1368 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1369 RTLFn = 1370 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1371 break; 1372 } 1373 case OMPRTL__kmpc_omp_task_complete_if0: { 1374 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1375 // *new_task); 1376 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1377 CGM.VoidPtrTy}; 1378 llvm::FunctionType *FnTy = 1379 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1380 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1381 /*Name=*/"__kmpc_omp_task_complete_if0"); 1382 break; 1383 } 1384 case OMPRTL__kmpc_ordered: { 1385 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1386 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1387 llvm::FunctionType *FnTy = 1388 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1389 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1390 break; 1391 } 1392 case OMPRTL__kmpc_end_ordered: { 1393 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1394 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1395 llvm::FunctionType *FnTy = 1396 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1397 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1398 break; 1399 } 1400 case OMPRTL__kmpc_omp_taskwait: { 1401 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1402 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1403 llvm::FunctionType *FnTy = 1404 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1405 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1406 break; 1407 } 1408 case OMPRTL__kmpc_taskgroup: { 1409 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1410 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1411 llvm::FunctionType *FnTy = 1412 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1413 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1414 break; 1415 } 1416 case OMPRTL__kmpc_end_taskgroup: { 1417 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1418 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1419 llvm::FunctionType *FnTy = 1420 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1421 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1422 break; 1423 } 1424 case OMPRTL__kmpc_push_proc_bind: { 1425 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1426 // int proc_bind) 1427 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1428 llvm::FunctionType *FnTy = 1429 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1430 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1431 break; 1432 } 1433 case OMPRTL__kmpc_omp_task_with_deps: { 1434 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1435 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1436 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1437 llvm::Type *TypeParams[] = { 1438 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1439 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1440 llvm::FunctionType *FnTy = 1441 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1442 RTLFn = 1443 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1444 break; 1445 } 1446 case OMPRTL__kmpc_omp_wait_deps: { 1447 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1448 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1449 // kmp_depend_info_t *noalias_dep_list); 1450 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1451 CGM.Int32Ty, CGM.VoidPtrTy, 1452 CGM.Int32Ty, CGM.VoidPtrTy}; 1453 llvm::FunctionType *FnTy = 1454 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1455 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1456 break; 1457 } 1458 case OMPRTL__kmpc_cancellationpoint: { 1459 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1460 // global_tid, kmp_int32 cncl_kind) 1461 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1462 llvm::FunctionType *FnTy = 1463 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1464 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1465 break; 1466 } 1467 case OMPRTL__kmpc_cancel: { 1468 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1469 // kmp_int32 cncl_kind) 1470 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1471 llvm::FunctionType *FnTy = 1472 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1473 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1474 break; 1475 } 1476 case OMPRTL__kmpc_push_num_teams: { 1477 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1478 // kmp_int32 num_teams, kmp_int32 num_threads) 1479 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1480 CGM.Int32Ty}; 1481 llvm::FunctionType *FnTy = 1482 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1483 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1484 break; 1485 } 1486 case OMPRTL__kmpc_fork_teams: { 1487 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1488 // microtask, ...); 1489 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1490 getKmpc_MicroPointerTy()}; 1491 llvm::FunctionType *FnTy = 1492 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1493 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1494 break; 1495 } 1496 case OMPRTL__kmpc_taskloop: { 1497 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1498 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1499 // sched, kmp_uint64 grainsize, void *task_dup); 1500 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1501 CGM.IntTy, 1502 CGM.VoidPtrTy, 1503 CGM.IntTy, 1504 CGM.Int64Ty->getPointerTo(), 1505 CGM.Int64Ty->getPointerTo(), 1506 CGM.Int64Ty, 1507 CGM.IntTy, 1508 CGM.IntTy, 1509 CGM.Int64Ty, 1510 CGM.VoidPtrTy}; 1511 llvm::FunctionType *FnTy = 1512 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1513 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 1514 break; 1515 } 1516 case OMPRTL__kmpc_doacross_init: { 1517 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 1518 // num_dims, struct kmp_dim *dims); 1519 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1520 CGM.Int32Ty, 1521 CGM.Int32Ty, 1522 CGM.VoidPtrTy}; 1523 llvm::FunctionType *FnTy = 1524 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1525 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 1526 break; 1527 } 1528 case OMPRTL__kmpc_doacross_fini: { 1529 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 1530 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1531 llvm::FunctionType *FnTy = 1532 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1533 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 1534 break; 1535 } 1536 case OMPRTL__kmpc_doacross_post: { 1537 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 1538 // *vec); 1539 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1540 CGM.Int64Ty->getPointerTo()}; 1541 llvm::FunctionType *FnTy = 1542 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1543 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 1544 break; 1545 } 1546 case OMPRTL__kmpc_doacross_wait: { 1547 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 1548 // *vec); 1549 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1550 CGM.Int64Ty->getPointerTo()}; 1551 llvm::FunctionType *FnTy = 1552 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1553 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 1554 break; 1555 } 1556 case OMPRTL__tgt_target: { 1557 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 1558 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 1559 // *arg_types); 1560 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1561 CGM.VoidPtrTy, 1562 CGM.Int32Ty, 1563 CGM.VoidPtrPtrTy, 1564 CGM.VoidPtrPtrTy, 1565 CGM.SizeTy->getPointerTo(), 1566 CGM.Int32Ty->getPointerTo()}; 1567 llvm::FunctionType *FnTy = 1568 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1569 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 1570 break; 1571 } 1572 case OMPRTL__tgt_target_teams: { 1573 // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 1574 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 1575 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 1576 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1577 CGM.VoidPtrTy, 1578 CGM.Int32Ty, 1579 CGM.VoidPtrPtrTy, 1580 CGM.VoidPtrPtrTy, 1581 CGM.SizeTy->getPointerTo(), 1582 CGM.Int32Ty->getPointerTo(), 1583 CGM.Int32Ty, 1584 CGM.Int32Ty}; 1585 llvm::FunctionType *FnTy = 1586 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1587 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 1588 break; 1589 } 1590 case OMPRTL__tgt_register_lib: { 1591 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 1592 QualType ParamTy = 1593 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1594 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1595 llvm::FunctionType *FnTy = 1596 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1597 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 1598 break; 1599 } 1600 case OMPRTL__tgt_unregister_lib: { 1601 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 1602 QualType ParamTy = 1603 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1604 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1605 llvm::FunctionType *FnTy = 1606 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1607 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 1608 break; 1609 } 1610 case OMPRTL__tgt_target_data_begin: { 1611 // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 1612 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1613 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1614 CGM.Int32Ty, 1615 CGM.VoidPtrPtrTy, 1616 CGM.VoidPtrPtrTy, 1617 CGM.SizeTy->getPointerTo(), 1618 CGM.Int32Ty->getPointerTo()}; 1619 llvm::FunctionType *FnTy = 1620 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1621 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 1622 break; 1623 } 1624 case OMPRTL__tgt_target_data_end: { 1625 // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 1626 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1627 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1628 CGM.Int32Ty, 1629 CGM.VoidPtrPtrTy, 1630 CGM.VoidPtrPtrTy, 1631 CGM.SizeTy->getPointerTo(), 1632 CGM.Int32Ty->getPointerTo()}; 1633 llvm::FunctionType *FnTy = 1634 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1635 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 1636 break; 1637 } 1638 case OMPRTL__tgt_target_data_update: { 1639 // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 1640 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1641 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1642 CGM.Int32Ty, 1643 CGM.VoidPtrPtrTy, 1644 CGM.VoidPtrPtrTy, 1645 CGM.SizeTy->getPointerTo(), 1646 CGM.Int32Ty->getPointerTo()}; 1647 llvm::FunctionType *FnTy = 1648 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1649 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 1650 break; 1651 } 1652 } 1653 assert(RTLFn && "Unable to find OpenMP runtime function"); 1654 return RTLFn; 1655 } 1656 1657 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 1658 bool IVSigned) { 1659 assert((IVSize == 32 || IVSize == 64) && 1660 "IV size is not compatible with the omp runtime"); 1661 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1662 : "__kmpc_for_static_init_4u") 1663 : (IVSigned ? "__kmpc_for_static_init_8" 1664 : "__kmpc_for_static_init_8u"); 1665 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1666 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1667 llvm::Type *TypeParams[] = { 1668 getIdentTyPointerTy(), // loc 1669 CGM.Int32Ty, // tid 1670 CGM.Int32Ty, // schedtype 1671 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1672 PtrTy, // p_lower 1673 PtrTy, // p_upper 1674 PtrTy, // p_stride 1675 ITy, // incr 1676 ITy // chunk 1677 }; 1678 llvm::FunctionType *FnTy = 1679 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1680 return CGM.CreateRuntimeFunction(FnTy, Name); 1681 } 1682 1683 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 1684 bool IVSigned) { 1685 assert((IVSize == 32 || IVSize == 64) && 1686 "IV size is not compatible with the omp runtime"); 1687 auto Name = 1688 IVSize == 32 1689 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1690 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1691 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1692 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1693 CGM.Int32Ty, // tid 1694 CGM.Int32Ty, // schedtype 1695 ITy, // lower 1696 ITy, // upper 1697 ITy, // stride 1698 ITy // chunk 1699 }; 1700 llvm::FunctionType *FnTy = 1701 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1702 return CGM.CreateRuntimeFunction(FnTy, Name); 1703 } 1704 1705 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1706 bool IVSigned) { 1707 assert((IVSize == 32 || IVSize == 64) && 1708 "IV size is not compatible with the omp runtime"); 1709 auto Name = 1710 IVSize == 32 1711 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1712 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1713 llvm::Type *TypeParams[] = { 1714 getIdentTyPointerTy(), // loc 1715 CGM.Int32Ty, // tid 1716 }; 1717 llvm::FunctionType *FnTy = 1718 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1719 return CGM.CreateRuntimeFunction(FnTy, Name); 1720 } 1721 1722 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1723 bool IVSigned) { 1724 assert((IVSize == 32 || IVSize == 64) && 1725 "IV size is not compatible with the omp runtime"); 1726 auto Name = 1727 IVSize == 32 1728 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1729 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1730 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1731 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1732 llvm::Type *TypeParams[] = { 1733 getIdentTyPointerTy(), // loc 1734 CGM.Int32Ty, // tid 1735 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1736 PtrTy, // p_lower 1737 PtrTy, // p_upper 1738 PtrTy // p_stride 1739 }; 1740 llvm::FunctionType *FnTy = 1741 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1742 return CGM.CreateRuntimeFunction(FnTy, Name); 1743 } 1744 1745 llvm::Constant * 1746 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1747 assert(!CGM.getLangOpts().OpenMPUseTLS || 1748 !CGM.getContext().getTargetInfo().isTLSSupported()); 1749 // Lookup the entry, lazily creating it if necessary. 1750 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1751 Twine(CGM.getMangledName(VD)) + ".cache."); 1752 } 1753 1754 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1755 const VarDecl *VD, 1756 Address VDAddr, 1757 SourceLocation Loc) { 1758 if (CGM.getLangOpts().OpenMPUseTLS && 1759 CGM.getContext().getTargetInfo().isTLSSupported()) 1760 return VDAddr; 1761 1762 auto VarTy = VDAddr.getElementType(); 1763 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1764 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1765 CGM.Int8PtrTy), 1766 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1767 getOrCreateThreadPrivateCache(VD)}; 1768 return Address(CGF.EmitRuntimeCall( 1769 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1770 VDAddr.getAlignment()); 1771 } 1772 1773 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1774 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1775 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1776 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1777 // library. 1778 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1779 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1780 OMPLoc); 1781 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1782 // to register constructor/destructor for variable. 1783 llvm::Value *Args[] = {OMPLoc, 1784 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1785 CGM.VoidPtrTy), 1786 Ctor, CopyCtor, Dtor}; 1787 CGF.EmitRuntimeCall( 1788 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1789 } 1790 1791 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1792 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1793 bool PerformInit, CodeGenFunction *CGF) { 1794 if (CGM.getLangOpts().OpenMPUseTLS && 1795 CGM.getContext().getTargetInfo().isTLSSupported()) 1796 return nullptr; 1797 1798 VD = VD->getDefinition(CGM.getContext()); 1799 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1800 ThreadPrivateWithDefinition.insert(VD); 1801 QualType ASTTy = VD->getType(); 1802 1803 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1804 auto Init = VD->getAnyInitializer(); 1805 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1806 // Generate function that re-emits the declaration's initializer into the 1807 // threadprivate copy of the variable VD 1808 CodeGenFunction CtorCGF(CGM); 1809 FunctionArgList Args; 1810 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1811 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1812 Args.push_back(&Dst); 1813 1814 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1815 CGM.getContext().VoidPtrTy, Args); 1816 auto FTy = CGM.getTypes().GetFunctionType(FI); 1817 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1818 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1819 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1820 Args, SourceLocation()); 1821 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1822 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1823 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1824 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1825 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1826 CtorCGF.ConvertTypeForMem(ASTTy)); 1827 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1828 /*IsInitializer=*/true); 1829 ArgVal = CtorCGF.EmitLoadOfScalar( 1830 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1831 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1832 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1833 CtorCGF.FinishFunction(); 1834 Ctor = Fn; 1835 } 1836 if (VD->getType().isDestructedType() != QualType::DK_none) { 1837 // Generate function that emits destructor call for the threadprivate copy 1838 // of the variable VD 1839 CodeGenFunction DtorCGF(CGM); 1840 FunctionArgList Args; 1841 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1842 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1843 Args.push_back(&Dst); 1844 1845 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1846 CGM.getContext().VoidTy, Args); 1847 auto FTy = CGM.getTypes().GetFunctionType(FI); 1848 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1849 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1850 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1851 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1852 SourceLocation()); 1853 // Create a scope with an artificial location for the body of this function. 1854 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1855 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1856 DtorCGF.GetAddrOfLocalVar(&Dst), 1857 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1858 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1859 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1860 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1861 DtorCGF.FinishFunction(); 1862 Dtor = Fn; 1863 } 1864 // Do not emit init function if it is not required. 1865 if (!Ctor && !Dtor) 1866 return nullptr; 1867 1868 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1869 auto CopyCtorTy = 1870 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1871 /*isVarArg=*/false)->getPointerTo(); 1872 // Copying constructor for the threadprivate variable. 1873 // Must be NULL - reserved by runtime, but currently it requires that this 1874 // parameter is always NULL. Otherwise it fires assertion. 1875 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1876 if (Ctor == nullptr) { 1877 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1878 /*isVarArg=*/false)->getPointerTo(); 1879 Ctor = llvm::Constant::getNullValue(CtorTy); 1880 } 1881 if (Dtor == nullptr) { 1882 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1883 /*isVarArg=*/false)->getPointerTo(); 1884 Dtor = llvm::Constant::getNullValue(DtorTy); 1885 } 1886 if (!CGF) { 1887 auto InitFunctionTy = 1888 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1889 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1890 InitFunctionTy, ".__omp_threadprivate_init_.", 1891 CGM.getTypes().arrangeNullaryFunction()); 1892 CodeGenFunction InitCGF(CGM); 1893 FunctionArgList ArgList; 1894 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1895 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1896 Loc); 1897 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1898 InitCGF.FinishFunction(); 1899 return InitFunction; 1900 } 1901 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1902 } 1903 return nullptr; 1904 } 1905 1906 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1907 /// function. Here is the logic: 1908 /// if (Cond) { 1909 /// ThenGen(); 1910 /// } else { 1911 /// ElseGen(); 1912 /// } 1913 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1914 const RegionCodeGenTy &ThenGen, 1915 const RegionCodeGenTy &ElseGen) { 1916 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1917 1918 // If the condition constant folds and can be elided, try to avoid emitting 1919 // the condition and the dead arm of the if/else. 1920 bool CondConstant; 1921 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1922 if (CondConstant) 1923 ThenGen(CGF); 1924 else 1925 ElseGen(CGF); 1926 return; 1927 } 1928 1929 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1930 // emit the conditional branch. 1931 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1932 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1933 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1934 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1935 1936 // Emit the 'then' code. 1937 CGF.EmitBlock(ThenBlock); 1938 ThenGen(CGF); 1939 CGF.EmitBranch(ContBlock); 1940 // Emit the 'else' code if present. 1941 // There is no need to emit line number for unconditional branch. 1942 (void)ApplyDebugLocation::CreateEmpty(CGF); 1943 CGF.EmitBlock(ElseBlock); 1944 ElseGen(CGF); 1945 // There is no need to emit line number for unconditional branch. 1946 (void)ApplyDebugLocation::CreateEmpty(CGF); 1947 CGF.EmitBranch(ContBlock); 1948 // Emit the continuation block for code after the if. 1949 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1950 } 1951 1952 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1953 llvm::Value *OutlinedFn, 1954 ArrayRef<llvm::Value *> CapturedVars, 1955 const Expr *IfCond) { 1956 if (!CGF.HaveInsertPoint()) 1957 return; 1958 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1959 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 1960 PrePostActionTy &) { 1961 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1962 auto &RT = CGF.CGM.getOpenMPRuntime(); 1963 llvm::Value *Args[] = { 1964 RTLoc, 1965 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1966 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1967 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1968 RealArgs.append(std::begin(Args), std::end(Args)); 1969 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1970 1971 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 1972 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1973 }; 1974 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 1975 PrePostActionTy &) { 1976 auto &RT = CGF.CGM.getOpenMPRuntime(); 1977 auto ThreadID = RT.getThreadID(CGF, Loc); 1978 // Build calls: 1979 // __kmpc_serialized_parallel(&Loc, GTid); 1980 llvm::Value *Args[] = {RTLoc, ThreadID}; 1981 CGF.EmitRuntimeCall( 1982 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 1983 1984 // OutlinedFn(>id, &zero, CapturedStruct); 1985 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1986 Address ZeroAddr = 1987 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1988 /*Name*/ ".zero.addr"); 1989 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1990 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1991 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1992 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1993 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1994 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1995 1996 // __kmpc_end_serialized_parallel(&Loc, GTid); 1997 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1998 CGF.EmitRuntimeCall( 1999 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2000 EndArgs); 2001 }; 2002 if (IfCond) 2003 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2004 else { 2005 RegionCodeGenTy ThenRCG(ThenGen); 2006 ThenRCG(CGF); 2007 } 2008 } 2009 2010 // If we're inside an (outlined) parallel region, use the region info's 2011 // thread-ID variable (it is passed in a first argument of the outlined function 2012 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2013 // regular serial code region, get thread ID by calling kmp_int32 2014 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2015 // return the address of that temp. 2016 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2017 SourceLocation Loc) { 2018 if (auto *OMPRegionInfo = 2019 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2020 if (OMPRegionInfo->getThreadIDVariable()) 2021 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2022 2023 auto ThreadID = getThreadID(CGF, Loc); 2024 auto Int32Ty = 2025 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2026 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2027 CGF.EmitStoreOfScalar(ThreadID, 2028 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2029 2030 return ThreadIDTemp; 2031 } 2032 2033 llvm::Constant * 2034 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2035 const llvm::Twine &Name) { 2036 SmallString<256> Buffer; 2037 llvm::raw_svector_ostream Out(Buffer); 2038 Out << Name; 2039 auto RuntimeName = Out.str(); 2040 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 2041 if (Elem.second) { 2042 assert(Elem.second->getType()->getPointerElementType() == Ty && 2043 "OMP internal variable has different type than requested"); 2044 return &*Elem.second; 2045 } 2046 2047 return Elem.second = new llvm::GlobalVariable( 2048 CGM.getModule(), Ty, /*IsConstant*/ false, 2049 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2050 Elem.first()); 2051 } 2052 2053 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2054 llvm::Twine Name(".gomp_critical_user_", CriticalName); 2055 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 2056 } 2057 2058 namespace { 2059 /// Common pre(post)-action for different OpenMP constructs. 2060 class CommonActionTy final : public PrePostActionTy { 2061 llvm::Value *EnterCallee; 2062 ArrayRef<llvm::Value *> EnterArgs; 2063 llvm::Value *ExitCallee; 2064 ArrayRef<llvm::Value *> ExitArgs; 2065 bool Conditional; 2066 llvm::BasicBlock *ContBlock = nullptr; 2067 2068 public: 2069 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2070 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2071 bool Conditional = false) 2072 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2073 ExitArgs(ExitArgs), Conditional(Conditional) {} 2074 void Enter(CodeGenFunction &CGF) override { 2075 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2076 if (Conditional) { 2077 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2078 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2079 ContBlock = CGF.createBasicBlock("omp_if.end"); 2080 // Generate the branch (If-stmt) 2081 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2082 CGF.EmitBlock(ThenBlock); 2083 } 2084 } 2085 void Done(CodeGenFunction &CGF) { 2086 // Emit the rest of blocks/branches 2087 CGF.EmitBranch(ContBlock); 2088 CGF.EmitBlock(ContBlock, true); 2089 } 2090 void Exit(CodeGenFunction &CGF) override { 2091 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2092 } 2093 }; 2094 } // anonymous namespace 2095 2096 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2097 StringRef CriticalName, 2098 const RegionCodeGenTy &CriticalOpGen, 2099 SourceLocation Loc, const Expr *Hint) { 2100 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2101 // CriticalOpGen(); 2102 // __kmpc_end_critical(ident_t *, gtid, Lock); 2103 // Prepare arguments and build a call to __kmpc_critical 2104 if (!CGF.HaveInsertPoint()) 2105 return; 2106 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2107 getCriticalRegionLock(CriticalName)}; 2108 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2109 std::end(Args)); 2110 if (Hint) { 2111 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2112 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2113 } 2114 CommonActionTy Action( 2115 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2116 : OMPRTL__kmpc_critical), 2117 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2118 CriticalOpGen.setAction(Action); 2119 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2120 } 2121 2122 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2123 const RegionCodeGenTy &MasterOpGen, 2124 SourceLocation Loc) { 2125 if (!CGF.HaveInsertPoint()) 2126 return; 2127 // if(__kmpc_master(ident_t *, gtid)) { 2128 // MasterOpGen(); 2129 // __kmpc_end_master(ident_t *, gtid); 2130 // } 2131 // Prepare arguments and build a call to __kmpc_master 2132 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2133 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2134 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2135 /*Conditional=*/true); 2136 MasterOpGen.setAction(Action); 2137 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2138 Action.Done(CGF); 2139 } 2140 2141 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2142 SourceLocation Loc) { 2143 if (!CGF.HaveInsertPoint()) 2144 return; 2145 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2146 llvm::Value *Args[] = { 2147 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2148 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2149 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2150 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2151 Region->emitUntiedSwitch(CGF); 2152 } 2153 2154 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2155 const RegionCodeGenTy &TaskgroupOpGen, 2156 SourceLocation Loc) { 2157 if (!CGF.HaveInsertPoint()) 2158 return; 2159 // __kmpc_taskgroup(ident_t *, gtid); 2160 // TaskgroupOpGen(); 2161 // __kmpc_end_taskgroup(ident_t *, gtid); 2162 // Prepare arguments and build a call to __kmpc_taskgroup 2163 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2164 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2165 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2166 Args); 2167 TaskgroupOpGen.setAction(Action); 2168 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2169 } 2170 2171 /// Given an array of pointers to variables, project the address of a 2172 /// given variable. 2173 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2174 unsigned Index, const VarDecl *Var) { 2175 // Pull out the pointer to the variable. 2176 Address PtrAddr = 2177 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2178 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2179 2180 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2181 Addr = CGF.Builder.CreateElementBitCast( 2182 Addr, CGF.ConvertTypeForMem(Var->getType())); 2183 return Addr; 2184 } 2185 2186 static llvm::Value *emitCopyprivateCopyFunction( 2187 CodeGenModule &CGM, llvm::Type *ArgsType, 2188 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2189 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 2190 auto &C = CGM.getContext(); 2191 // void copy_func(void *LHSArg, void *RHSArg); 2192 FunctionArgList Args; 2193 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2194 C.VoidPtrTy); 2195 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2196 C.VoidPtrTy); 2197 Args.push_back(&LHSArg); 2198 Args.push_back(&RHSArg); 2199 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2200 auto *Fn = llvm::Function::Create( 2201 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2202 ".omp.copyprivate.copy_func", &CGM.getModule()); 2203 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2204 CodeGenFunction CGF(CGM); 2205 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2206 // Dest = (void*[n])(LHSArg); 2207 // Src = (void*[n])(RHSArg); 2208 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2209 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2210 ArgsType), CGF.getPointerAlign()); 2211 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2212 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2213 ArgsType), CGF.getPointerAlign()); 2214 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2215 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2216 // ... 2217 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2218 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2219 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2220 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2221 2222 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2223 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2224 2225 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2226 QualType Type = VD->getType(); 2227 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2228 } 2229 CGF.FinishFunction(); 2230 return Fn; 2231 } 2232 2233 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2234 const RegionCodeGenTy &SingleOpGen, 2235 SourceLocation Loc, 2236 ArrayRef<const Expr *> CopyprivateVars, 2237 ArrayRef<const Expr *> SrcExprs, 2238 ArrayRef<const Expr *> DstExprs, 2239 ArrayRef<const Expr *> AssignmentOps) { 2240 if (!CGF.HaveInsertPoint()) 2241 return; 2242 assert(CopyprivateVars.size() == SrcExprs.size() && 2243 CopyprivateVars.size() == DstExprs.size() && 2244 CopyprivateVars.size() == AssignmentOps.size()); 2245 auto &C = CGM.getContext(); 2246 // int32 did_it = 0; 2247 // if(__kmpc_single(ident_t *, gtid)) { 2248 // SingleOpGen(); 2249 // __kmpc_end_single(ident_t *, gtid); 2250 // did_it = 1; 2251 // } 2252 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2253 // <copy_func>, did_it); 2254 2255 Address DidIt = Address::invalid(); 2256 if (!CopyprivateVars.empty()) { 2257 // int32 did_it = 0; 2258 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2259 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2260 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2261 } 2262 // Prepare arguments and build a call to __kmpc_single 2263 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2264 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2265 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2266 /*Conditional=*/true); 2267 SingleOpGen.setAction(Action); 2268 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2269 if (DidIt.isValid()) { 2270 // did_it = 1; 2271 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2272 } 2273 Action.Done(CGF); 2274 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2275 // <copy_func>, did_it); 2276 if (DidIt.isValid()) { 2277 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2278 auto CopyprivateArrayTy = 2279 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2280 /*IndexTypeQuals=*/0); 2281 // Create a list of all private variables for copyprivate. 2282 Address CopyprivateList = 2283 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2284 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2285 Address Elem = CGF.Builder.CreateConstArrayGEP( 2286 CopyprivateList, I, CGF.getPointerSize()); 2287 CGF.Builder.CreateStore( 2288 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2289 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2290 Elem); 2291 } 2292 // Build function that copies private values from single region to all other 2293 // threads in the corresponding parallel region. 2294 auto *CpyFn = emitCopyprivateCopyFunction( 2295 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2296 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 2297 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2298 Address CL = 2299 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2300 CGF.VoidPtrTy); 2301 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2302 llvm::Value *Args[] = { 2303 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2304 getThreadID(CGF, Loc), // i32 <gtid> 2305 BufSize, // size_t <buf_size> 2306 CL.getPointer(), // void *<copyprivate list> 2307 CpyFn, // void (*) (void *, void *) <copy_func> 2308 DidItVal // i32 did_it 2309 }; 2310 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2311 } 2312 } 2313 2314 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2315 const RegionCodeGenTy &OrderedOpGen, 2316 SourceLocation Loc, bool IsThreads) { 2317 if (!CGF.HaveInsertPoint()) 2318 return; 2319 // __kmpc_ordered(ident_t *, gtid); 2320 // OrderedOpGen(); 2321 // __kmpc_end_ordered(ident_t *, gtid); 2322 // Prepare arguments and build a call to __kmpc_ordered 2323 if (IsThreads) { 2324 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2325 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2326 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2327 Args); 2328 OrderedOpGen.setAction(Action); 2329 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2330 return; 2331 } 2332 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2333 } 2334 2335 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2336 OpenMPDirectiveKind Kind, bool EmitChecks, 2337 bool ForceSimpleCall) { 2338 if (!CGF.HaveInsertPoint()) 2339 return; 2340 // Build call __kmpc_cancel_barrier(loc, thread_id); 2341 // Build call __kmpc_barrier(loc, thread_id); 2342 unsigned Flags; 2343 if (Kind == OMPD_for) 2344 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2345 else if (Kind == OMPD_sections) 2346 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2347 else if (Kind == OMPD_single) 2348 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2349 else if (Kind == OMPD_barrier) 2350 Flags = OMP_IDENT_BARRIER_EXPL; 2351 else 2352 Flags = OMP_IDENT_BARRIER_IMPL; 2353 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2354 // thread_id); 2355 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2356 getThreadID(CGF, Loc)}; 2357 if (auto *OMPRegionInfo = 2358 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2359 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2360 auto *Result = CGF.EmitRuntimeCall( 2361 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2362 if (EmitChecks) { 2363 // if (__kmpc_cancel_barrier()) { 2364 // exit from construct; 2365 // } 2366 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2367 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2368 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2369 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2370 CGF.EmitBlock(ExitBB); 2371 // exit from construct; 2372 auto CancelDestination = 2373 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2374 CGF.EmitBranchThroughCleanup(CancelDestination); 2375 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2376 } 2377 return; 2378 } 2379 } 2380 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2381 } 2382 2383 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2384 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2385 bool Chunked, bool Ordered) { 2386 switch (ScheduleKind) { 2387 case OMPC_SCHEDULE_static: 2388 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2389 : (Ordered ? OMP_ord_static : OMP_sch_static); 2390 case OMPC_SCHEDULE_dynamic: 2391 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2392 case OMPC_SCHEDULE_guided: 2393 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2394 case OMPC_SCHEDULE_runtime: 2395 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2396 case OMPC_SCHEDULE_auto: 2397 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2398 case OMPC_SCHEDULE_unknown: 2399 assert(!Chunked && "chunk was specified but schedule kind not known"); 2400 return Ordered ? OMP_ord_static : OMP_sch_static; 2401 } 2402 llvm_unreachable("Unexpected runtime schedule"); 2403 } 2404 2405 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 2406 static OpenMPSchedType 2407 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2408 // only static is allowed for dist_schedule 2409 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2410 } 2411 2412 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2413 bool Chunked) const { 2414 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2415 return Schedule == OMP_sch_static; 2416 } 2417 2418 bool CGOpenMPRuntime::isStaticNonchunked( 2419 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2420 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2421 return Schedule == OMP_dist_sch_static; 2422 } 2423 2424 2425 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2426 auto Schedule = 2427 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2428 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2429 return Schedule != OMP_sch_static; 2430 } 2431 2432 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 2433 OpenMPScheduleClauseModifier M1, 2434 OpenMPScheduleClauseModifier M2) { 2435 int Modifier = 0; 2436 switch (M1) { 2437 case OMPC_SCHEDULE_MODIFIER_monotonic: 2438 Modifier = OMP_sch_modifier_monotonic; 2439 break; 2440 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2441 Modifier = OMP_sch_modifier_nonmonotonic; 2442 break; 2443 case OMPC_SCHEDULE_MODIFIER_simd: 2444 if (Schedule == OMP_sch_static_chunked) 2445 Schedule = OMP_sch_static_balanced_chunked; 2446 break; 2447 case OMPC_SCHEDULE_MODIFIER_last: 2448 case OMPC_SCHEDULE_MODIFIER_unknown: 2449 break; 2450 } 2451 switch (M2) { 2452 case OMPC_SCHEDULE_MODIFIER_monotonic: 2453 Modifier = OMP_sch_modifier_monotonic; 2454 break; 2455 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2456 Modifier = OMP_sch_modifier_nonmonotonic; 2457 break; 2458 case OMPC_SCHEDULE_MODIFIER_simd: 2459 if (Schedule == OMP_sch_static_chunked) 2460 Schedule = OMP_sch_static_balanced_chunked; 2461 break; 2462 case OMPC_SCHEDULE_MODIFIER_last: 2463 case OMPC_SCHEDULE_MODIFIER_unknown: 2464 break; 2465 } 2466 return Schedule | Modifier; 2467 } 2468 2469 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 2470 SourceLocation Loc, 2471 const OpenMPScheduleTy &ScheduleKind, 2472 unsigned IVSize, bool IVSigned, 2473 bool Ordered, llvm::Value *UB, 2474 llvm::Value *Chunk) { 2475 if (!CGF.HaveInsertPoint()) 2476 return; 2477 OpenMPSchedType Schedule = 2478 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 2479 assert(Ordered || 2480 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2481 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2482 Schedule != OMP_sch_static_balanced_chunked)); 2483 // Call __kmpc_dispatch_init( 2484 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2485 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2486 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2487 2488 // If the Chunk was not specified in the clause - use default value 1. 2489 if (Chunk == nullptr) 2490 Chunk = CGF.Builder.getIntN(IVSize, 1); 2491 llvm::Value *Args[] = { 2492 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2493 CGF.Builder.getInt32(addMonoNonMonoModifier( 2494 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2495 CGF.Builder.getIntN(IVSize, 0), // Lower 2496 UB, // Upper 2497 CGF.Builder.getIntN(IVSize, 1), // Stride 2498 Chunk // Chunk 2499 }; 2500 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2501 } 2502 2503 static void emitForStaticInitCall( 2504 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2505 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 2506 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2507 unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, 2508 Address ST, llvm::Value *Chunk) { 2509 if (!CGF.HaveInsertPoint()) 2510 return; 2511 2512 assert(!Ordered); 2513 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2514 Schedule == OMP_sch_static_balanced_chunked || 2515 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2516 Schedule == OMP_dist_sch_static || 2517 Schedule == OMP_dist_sch_static_chunked); 2518 2519 // Call __kmpc_for_static_init( 2520 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2521 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2522 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2523 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2524 if (Chunk == nullptr) { 2525 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2526 Schedule == OMP_dist_sch_static) && 2527 "expected static non-chunked schedule"); 2528 // If the Chunk was not specified in the clause - use default value 1. 2529 Chunk = CGF.Builder.getIntN(IVSize, 1); 2530 } else { 2531 assert((Schedule == OMP_sch_static_chunked || 2532 Schedule == OMP_sch_static_balanced_chunked || 2533 Schedule == OMP_ord_static_chunked || 2534 Schedule == OMP_dist_sch_static_chunked) && 2535 "expected static chunked schedule"); 2536 } 2537 llvm::Value *Args[] = { 2538 UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( 2539 Schedule, M1, M2)), // Schedule type 2540 IL.getPointer(), // &isLastIter 2541 LB.getPointer(), // &LB 2542 UB.getPointer(), // &UB 2543 ST.getPointer(), // &Stride 2544 CGF.Builder.getIntN(IVSize, 1), // Incr 2545 Chunk // Chunk 2546 }; 2547 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2548 } 2549 2550 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2551 SourceLocation Loc, 2552 const OpenMPScheduleTy &ScheduleKind, 2553 unsigned IVSize, bool IVSigned, 2554 bool Ordered, Address IL, Address LB, 2555 Address UB, Address ST, 2556 llvm::Value *Chunk) { 2557 OpenMPSchedType ScheduleNum = 2558 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 2559 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2560 auto *ThreadId = getThreadID(CGF, Loc); 2561 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2562 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2563 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, 2564 Ordered, IL, LB, UB, ST, Chunk); 2565 } 2566 2567 void CGOpenMPRuntime::emitDistributeStaticInit( 2568 CodeGenFunction &CGF, SourceLocation Loc, 2569 OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, 2570 bool Ordered, Address IL, Address LB, Address UB, Address ST, 2571 llvm::Value *Chunk) { 2572 OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); 2573 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2574 auto *ThreadId = getThreadID(CGF, Loc); 2575 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2576 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2577 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2578 OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, 2579 UB, ST, Chunk); 2580 } 2581 2582 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2583 SourceLocation Loc) { 2584 if (!CGF.HaveInsertPoint()) 2585 return; 2586 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2587 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2588 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 2589 Args); 2590 } 2591 2592 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2593 SourceLocation Loc, 2594 unsigned IVSize, 2595 bool IVSigned) { 2596 if (!CGF.HaveInsertPoint()) 2597 return; 2598 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2599 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2600 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2601 } 2602 2603 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2604 SourceLocation Loc, unsigned IVSize, 2605 bool IVSigned, Address IL, 2606 Address LB, Address UB, 2607 Address ST) { 2608 // Call __kmpc_dispatch_next( 2609 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2610 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2611 // kmp_int[32|64] *p_stride); 2612 llvm::Value *Args[] = { 2613 emitUpdateLocation(CGF, Loc), 2614 getThreadID(CGF, Loc), 2615 IL.getPointer(), // &isLastIter 2616 LB.getPointer(), // &Lower 2617 UB.getPointer(), // &Upper 2618 ST.getPointer() // &Stride 2619 }; 2620 llvm::Value *Call = 2621 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2622 return CGF.EmitScalarConversion( 2623 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 2624 CGF.getContext().BoolTy, Loc); 2625 } 2626 2627 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2628 llvm::Value *NumThreads, 2629 SourceLocation Loc) { 2630 if (!CGF.HaveInsertPoint()) 2631 return; 2632 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2633 llvm::Value *Args[] = { 2634 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2635 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2636 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 2637 Args); 2638 } 2639 2640 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2641 OpenMPProcBindClauseKind ProcBind, 2642 SourceLocation Loc) { 2643 if (!CGF.HaveInsertPoint()) 2644 return; 2645 // Constants for proc bind value accepted by the runtime. 2646 enum ProcBindTy { 2647 ProcBindFalse = 0, 2648 ProcBindTrue, 2649 ProcBindMaster, 2650 ProcBindClose, 2651 ProcBindSpread, 2652 ProcBindIntel, 2653 ProcBindDefault 2654 } RuntimeProcBind; 2655 switch (ProcBind) { 2656 case OMPC_PROC_BIND_master: 2657 RuntimeProcBind = ProcBindMaster; 2658 break; 2659 case OMPC_PROC_BIND_close: 2660 RuntimeProcBind = ProcBindClose; 2661 break; 2662 case OMPC_PROC_BIND_spread: 2663 RuntimeProcBind = ProcBindSpread; 2664 break; 2665 case OMPC_PROC_BIND_unknown: 2666 llvm_unreachable("Unsupported proc_bind value."); 2667 } 2668 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2669 llvm::Value *Args[] = { 2670 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2671 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 2672 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 2673 } 2674 2675 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2676 SourceLocation Loc) { 2677 if (!CGF.HaveInsertPoint()) 2678 return; 2679 // Build call void __kmpc_flush(ident_t *loc) 2680 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 2681 emitUpdateLocation(CGF, Loc)); 2682 } 2683 2684 namespace { 2685 /// \brief Indexes of fields for type kmp_task_t. 2686 enum KmpTaskTFields { 2687 /// \brief List of shared variables. 2688 KmpTaskTShareds, 2689 /// \brief Task routine. 2690 KmpTaskTRoutine, 2691 /// \brief Partition id for the untied tasks. 2692 KmpTaskTPartId, 2693 /// Function with call of destructors for private variables. 2694 Data1, 2695 /// Task priority. 2696 Data2, 2697 /// (Taskloops only) Lower bound. 2698 KmpTaskTLowerBound, 2699 /// (Taskloops only) Upper bound. 2700 KmpTaskTUpperBound, 2701 /// (Taskloops only) Stride. 2702 KmpTaskTStride, 2703 /// (Taskloops only) Is last iteration flag. 2704 KmpTaskTLastIter, 2705 }; 2706 } // anonymous namespace 2707 2708 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2709 // FIXME: Add other entries type when they become supported. 2710 return OffloadEntriesTargetRegion.empty(); 2711 } 2712 2713 /// \brief Initialize target region entry. 2714 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2715 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2716 StringRef ParentName, unsigned LineNum, 2717 unsigned Order) { 2718 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2719 "only required for the device " 2720 "code generation."); 2721 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2722 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2723 /*Flags=*/0); 2724 ++OffloadingEntriesNum; 2725 } 2726 2727 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2728 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2729 StringRef ParentName, unsigned LineNum, 2730 llvm::Constant *Addr, llvm::Constant *ID, 2731 int32_t Flags) { 2732 // If we are emitting code for a target, the entry is already initialized, 2733 // only has to be registered. 2734 if (CGM.getLangOpts().OpenMPIsDevice) { 2735 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2736 "Entry must exist."); 2737 auto &Entry = 2738 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2739 assert(Entry.isValid() && "Entry not initialized!"); 2740 Entry.setAddress(Addr); 2741 Entry.setID(ID); 2742 Entry.setFlags(Flags); 2743 return; 2744 } else { 2745 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); 2746 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2747 } 2748 } 2749 2750 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2751 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2752 unsigned LineNum) const { 2753 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2754 if (PerDevice == OffloadEntriesTargetRegion.end()) 2755 return false; 2756 auto PerFile = PerDevice->second.find(FileID); 2757 if (PerFile == PerDevice->second.end()) 2758 return false; 2759 auto PerParentName = PerFile->second.find(ParentName); 2760 if (PerParentName == PerFile->second.end()) 2761 return false; 2762 auto PerLine = PerParentName->second.find(LineNum); 2763 if (PerLine == PerParentName->second.end()) 2764 return false; 2765 // Fail if this entry is already registered. 2766 if (PerLine->second.getAddress() || PerLine->second.getID()) 2767 return false; 2768 return true; 2769 } 2770 2771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2772 const OffloadTargetRegionEntryInfoActTy &Action) { 2773 // Scan all target region entries and perform the provided action. 2774 for (auto &D : OffloadEntriesTargetRegion) 2775 for (auto &F : D.second) 2776 for (auto &P : F.second) 2777 for (auto &L : P.second) 2778 Action(D.first, F.first, P.first(), L.first, L.second); 2779 } 2780 2781 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 2782 /// \a Codegen. This is used to emit the two functions that register and 2783 /// unregister the descriptor of the current compilation unit. 2784 static llvm::Function * 2785 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 2786 const RegionCodeGenTy &Codegen) { 2787 auto &C = CGM.getContext(); 2788 FunctionArgList Args; 2789 ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), 2790 /*Id=*/nullptr, C.VoidPtrTy); 2791 Args.push_back(&DummyPtr); 2792 2793 CodeGenFunction CGF(CGM); 2794 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2795 auto FTy = CGM.getTypes().GetFunctionType(FI); 2796 auto *Fn = 2797 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 2798 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 2799 Codegen(CGF); 2800 CGF.FinishFunction(); 2801 return Fn; 2802 } 2803 2804 llvm::Function * 2805 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 2806 2807 // If we don't have entries or if we are emitting code for the device, we 2808 // don't need to do anything. 2809 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 2810 return nullptr; 2811 2812 auto &M = CGM.getModule(); 2813 auto &C = CGM.getContext(); 2814 2815 // Get list of devices we care about 2816 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 2817 2818 // We should be creating an offloading descriptor only if there are devices 2819 // specified. 2820 assert(!Devices.empty() && "No OpenMP offloading devices??"); 2821 2822 // Create the external variables that will point to the begin and end of the 2823 // host entries section. These will be defined by the linker. 2824 auto *OffloadEntryTy = 2825 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 2826 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 2827 M, OffloadEntryTy, /*isConstant=*/true, 2828 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2829 ".omp_offloading.entries_begin"); 2830 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 2831 M, OffloadEntryTy, /*isConstant=*/true, 2832 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2833 ".omp_offloading.entries_end"); 2834 2835 // Create all device images 2836 auto *DeviceImageTy = cast<llvm::StructType>( 2837 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 2838 ConstantInitBuilder DeviceImagesBuilder(CGM); 2839 auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); 2840 2841 for (unsigned i = 0; i < Devices.size(); ++i) { 2842 StringRef T = Devices[i].getTriple(); 2843 auto *ImgBegin = new llvm::GlobalVariable( 2844 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2845 /*Initializer=*/nullptr, 2846 Twine(".omp_offloading.img_start.") + Twine(T)); 2847 auto *ImgEnd = new llvm::GlobalVariable( 2848 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2849 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 2850 2851 auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); 2852 Dev.add(ImgBegin); 2853 Dev.add(ImgEnd); 2854 Dev.add(HostEntriesBegin); 2855 Dev.add(HostEntriesEnd); 2856 Dev.finishAndAddTo(DeviceImagesEntries); 2857 } 2858 2859 // Create device images global array. 2860 llvm::GlobalVariable *DeviceImages = 2861 DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", 2862 CGM.getPointerAlign(), 2863 /*isConstant=*/true); 2864 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2865 2866 // This is a Zero array to be used in the creation of the constant expressions 2867 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 2868 llvm::Constant::getNullValue(CGM.Int32Ty)}; 2869 2870 // Create the target region descriptor. 2871 auto *BinaryDescriptorTy = cast<llvm::StructType>( 2872 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 2873 ConstantInitBuilder DescBuilder(CGM); 2874 auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); 2875 DescInit.addInt(CGM.Int32Ty, Devices.size()); 2876 DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 2877 DeviceImages, 2878 Index)); 2879 DescInit.add(HostEntriesBegin); 2880 DescInit.add(HostEntriesEnd); 2881 2882 auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", 2883 CGM.getPointerAlign(), 2884 /*isConstant=*/true); 2885 2886 // Emit code to register or unregister the descriptor at execution 2887 // startup or closing, respectively. 2888 2889 // Create a variable to drive the registration and unregistration of the 2890 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 2891 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 2892 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 2893 IdentInfo, C.CharTy); 2894 2895 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 2896 CGM, ".omp_offloading.descriptor_unreg", 2897 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2898 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 2899 Desc); 2900 }); 2901 auto *RegFn = createOffloadingBinaryDescriptorFunction( 2902 CGM, ".omp_offloading.descriptor_reg", 2903 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2904 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 2905 Desc); 2906 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 2907 }); 2908 return RegFn; 2909 } 2910 2911 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 2912 llvm::Constant *Addr, uint64_t Size, 2913 int32_t Flags) { 2914 StringRef Name = Addr->getName(); 2915 auto *TgtOffloadEntryType = cast<llvm::StructType>( 2916 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 2917 llvm::LLVMContext &C = CGM.getModule().getContext(); 2918 llvm::Module &M = CGM.getModule(); 2919 2920 // Make sure the address has the right type. 2921 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 2922 2923 // Create constant string with the name. 2924 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 2925 2926 llvm::GlobalVariable *Str = 2927 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 2928 llvm::GlobalValue::InternalLinkage, StrPtrInit, 2929 ".omp_offloading.entry_name"); 2930 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2931 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 2932 2933 // We can't have any padding between symbols, so we need to have 1-byte 2934 // alignment. 2935 auto Align = CharUnits::fromQuantity(1); 2936 2937 // Create the entry struct. 2938 ConstantInitBuilder EntryBuilder(CGM); 2939 auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); 2940 EntryInit.add(AddrPtr); 2941 EntryInit.add(StrPtr); 2942 EntryInit.addInt(CGM.SizeTy, Size); 2943 EntryInit.addInt(CGM.Int32Ty, Flags); 2944 EntryInit.addInt(CGM.Int32Ty, 0); 2945 llvm::GlobalVariable *Entry = 2946 EntryInit.finishAndCreateGlobal(".omp_offloading.entry", 2947 Align, 2948 /*constant*/ true, 2949 llvm::GlobalValue::ExternalLinkage); 2950 2951 // The entry has to be created in the section the linker expects it to be. 2952 Entry->setSection(".omp_offloading.entries"); 2953 } 2954 2955 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2956 // Emit the offloading entries and metadata so that the device codegen side 2957 // can easily figure out what to emit. The produced metadata looks like 2958 // this: 2959 // 2960 // !omp_offload.info = !{!1, ...} 2961 // 2962 // Right now we only generate metadata for function that contain target 2963 // regions. 2964 2965 // If we do not have entries, we dont need to do anything. 2966 if (OffloadEntriesInfoManager.empty()) 2967 return; 2968 2969 llvm::Module &M = CGM.getModule(); 2970 llvm::LLVMContext &C = M.getContext(); 2971 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 2972 OrderedEntries(OffloadEntriesInfoManager.size()); 2973 2974 // Create the offloading info metadata node. 2975 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 2976 2977 // Auxiliary methods to create metadata values and strings. 2978 auto getMDInt = [&](unsigned v) { 2979 return llvm::ConstantAsMetadata::get( 2980 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 2981 }; 2982 2983 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 2984 2985 // Create function that emits metadata for each target region entry; 2986 auto &&TargetRegionMetadataEmitter = [&]( 2987 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 2988 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 2989 llvm::SmallVector<llvm::Metadata *, 32> Ops; 2990 // Generate metadata for target regions. Each entry of this metadata 2991 // contains: 2992 // - Entry 0 -> Kind of this type of metadata (0). 2993 // - Entry 1 -> Device ID of the file where the entry was identified. 2994 // - Entry 2 -> File ID of the file where the entry was identified. 2995 // - Entry 3 -> Mangled name of the function where the entry was identified. 2996 // - Entry 4 -> Line in the file where the entry was identified. 2997 // - Entry 5 -> Order the entry was created. 2998 // The first element of the metadata node is the kind. 2999 Ops.push_back(getMDInt(E.getKind())); 3000 Ops.push_back(getMDInt(DeviceID)); 3001 Ops.push_back(getMDInt(FileID)); 3002 Ops.push_back(getMDString(ParentName)); 3003 Ops.push_back(getMDInt(Line)); 3004 Ops.push_back(getMDInt(E.getOrder())); 3005 3006 // Save this entry in the right position of the ordered entries array. 3007 OrderedEntries[E.getOrder()] = &E; 3008 3009 // Add metadata to the named metadata node. 3010 MD->addOperand(llvm::MDNode::get(C, Ops)); 3011 }; 3012 3013 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3014 TargetRegionMetadataEmitter); 3015 3016 for (auto *E : OrderedEntries) { 3017 assert(E && "All ordered entries must exist!"); 3018 if (auto *CE = 3019 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3020 E)) { 3021 assert(CE->getID() && CE->getAddress() && 3022 "Entry ID and Addr are invalid!"); 3023 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 3024 } else 3025 llvm_unreachable("Unsupported entry kind."); 3026 } 3027 } 3028 3029 /// \brief Loads all the offload entries information from the host IR 3030 /// metadata. 3031 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3032 // If we are in target mode, load the metadata from the host IR. This code has 3033 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3034 3035 if (!CGM.getLangOpts().OpenMPIsDevice) 3036 return; 3037 3038 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3039 return; 3040 3041 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3042 if (Buf.getError()) 3043 return; 3044 3045 llvm::LLVMContext C; 3046 auto ME = expectedToErrorOrAndEmitErrors( 3047 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3048 3049 if (ME.getError()) 3050 return; 3051 3052 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3053 if (!MD) 3054 return; 3055 3056 for (auto I : MD->operands()) { 3057 llvm::MDNode *MN = cast<llvm::MDNode>(I); 3058 3059 auto getMDInt = [&](unsigned Idx) { 3060 llvm::ConstantAsMetadata *V = 3061 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3062 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3063 }; 3064 3065 auto getMDString = [&](unsigned Idx) { 3066 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3067 return V->getString(); 3068 }; 3069 3070 switch (getMDInt(0)) { 3071 default: 3072 llvm_unreachable("Unexpected metadata!"); 3073 break; 3074 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3075 OFFLOAD_ENTRY_INFO_TARGET_REGION: 3076 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3077 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 3078 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 3079 /*Order=*/getMDInt(5)); 3080 break; 3081 } 3082 } 3083 } 3084 3085 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3086 if (!KmpRoutineEntryPtrTy) { 3087 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3088 auto &C = CGM.getContext(); 3089 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3090 FunctionProtoType::ExtProtoInfo EPI; 3091 KmpRoutineEntryPtrQTy = C.getPointerType( 3092 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3093 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3094 } 3095 } 3096 3097 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 3098 QualType FieldTy) { 3099 auto *Field = FieldDecl::Create( 3100 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 3101 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 3102 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 3103 Field->setAccess(AS_public); 3104 DC->addDecl(Field); 3105 return Field; 3106 } 3107 3108 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3109 3110 // Make sure the type of the entry is already created. This is the type we 3111 // have to create: 3112 // struct __tgt_offload_entry{ 3113 // void *addr; // Pointer to the offload entry info. 3114 // // (function or global) 3115 // char *name; // Name of the function or global. 3116 // size_t size; // Size of the entry info (0 if it a function). 3117 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3118 // int32_t reserved; // Reserved, to use by the runtime library. 3119 // }; 3120 if (TgtOffloadEntryQTy.isNull()) { 3121 ASTContext &C = CGM.getContext(); 3122 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3123 RD->startDefinition(); 3124 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3125 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3126 addFieldToRecordDecl(C, RD, C.getSizeType()); 3127 addFieldToRecordDecl( 3128 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3129 addFieldToRecordDecl( 3130 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3131 RD->completeDefinition(); 3132 TgtOffloadEntryQTy = C.getRecordType(RD); 3133 } 3134 return TgtOffloadEntryQTy; 3135 } 3136 3137 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 3138 // These are the types we need to build: 3139 // struct __tgt_device_image{ 3140 // void *ImageStart; // Pointer to the target code start. 3141 // void *ImageEnd; // Pointer to the target code end. 3142 // // We also add the host entries to the device image, as it may be useful 3143 // // for the target runtime to have access to that information. 3144 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 3145 // // the entries. 3146 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3147 // // entries (non inclusive). 3148 // }; 3149 if (TgtDeviceImageQTy.isNull()) { 3150 ASTContext &C = CGM.getContext(); 3151 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 3152 RD->startDefinition(); 3153 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3154 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3155 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3156 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3157 RD->completeDefinition(); 3158 TgtDeviceImageQTy = C.getRecordType(RD); 3159 } 3160 return TgtDeviceImageQTy; 3161 } 3162 3163 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 3164 // struct __tgt_bin_desc{ 3165 // int32_t NumDevices; // Number of devices supported. 3166 // __tgt_device_image *DeviceImages; // Arrays of device images 3167 // // (one per device). 3168 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 3169 // // entries. 3170 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3171 // // entries (non inclusive). 3172 // }; 3173 if (TgtBinaryDescriptorQTy.isNull()) { 3174 ASTContext &C = CGM.getContext(); 3175 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 3176 RD->startDefinition(); 3177 addFieldToRecordDecl( 3178 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3179 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 3180 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3181 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3182 RD->completeDefinition(); 3183 TgtBinaryDescriptorQTy = C.getRecordType(RD); 3184 } 3185 return TgtBinaryDescriptorQTy; 3186 } 3187 3188 namespace { 3189 struct PrivateHelpersTy { 3190 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 3191 const VarDecl *PrivateElemInit) 3192 : Original(Original), PrivateCopy(PrivateCopy), 3193 PrivateElemInit(PrivateElemInit) {} 3194 const VarDecl *Original; 3195 const VarDecl *PrivateCopy; 3196 const VarDecl *PrivateElemInit; 3197 }; 3198 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3199 } // anonymous namespace 3200 3201 static RecordDecl * 3202 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3203 if (!Privates.empty()) { 3204 auto &C = CGM.getContext(); 3205 // Build struct .kmp_privates_t. { 3206 // /* private vars */ 3207 // }; 3208 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 3209 RD->startDefinition(); 3210 for (auto &&Pair : Privates) { 3211 auto *VD = Pair.second.Original; 3212 auto Type = VD->getType(); 3213 Type = Type.getNonReferenceType(); 3214 auto *FD = addFieldToRecordDecl(C, RD, Type); 3215 if (VD->hasAttrs()) { 3216 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3217 E(VD->getAttrs().end()); 3218 I != E; ++I) 3219 FD->addAttr(*I); 3220 } 3221 } 3222 RD->completeDefinition(); 3223 return RD; 3224 } 3225 return nullptr; 3226 } 3227 3228 static RecordDecl * 3229 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3230 QualType KmpInt32Ty, 3231 QualType KmpRoutineEntryPointerQTy) { 3232 auto &C = CGM.getContext(); 3233 // Build struct kmp_task_t { 3234 // void * shareds; 3235 // kmp_routine_entry_t routine; 3236 // kmp_int32 part_id; 3237 // kmp_cmplrdata_t data1; 3238 // kmp_cmplrdata_t data2; 3239 // For taskloops additional fields: 3240 // kmp_uint64 lb; 3241 // kmp_uint64 ub; 3242 // kmp_int64 st; 3243 // kmp_int32 liter; 3244 // }; 3245 auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3246 UD->startDefinition(); 3247 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3248 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3249 UD->completeDefinition(); 3250 QualType KmpCmplrdataTy = C.getRecordType(UD); 3251 auto *RD = C.buildImplicitRecord("kmp_task_t"); 3252 RD->startDefinition(); 3253 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3254 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3255 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3256 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3257 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3258 if (isOpenMPTaskLoopDirective(Kind)) { 3259 QualType KmpUInt64Ty = 3260 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3261 QualType KmpInt64Ty = 3262 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3263 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3264 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3265 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3266 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3267 } 3268 RD->completeDefinition(); 3269 return RD; 3270 } 3271 3272 static RecordDecl * 3273 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3274 ArrayRef<PrivateDataTy> Privates) { 3275 auto &C = CGM.getContext(); 3276 // Build struct kmp_task_t_with_privates { 3277 // kmp_task_t task_data; 3278 // .kmp_privates_t. privates; 3279 // }; 3280 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3281 RD->startDefinition(); 3282 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3283 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 3284 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3285 } 3286 RD->completeDefinition(); 3287 return RD; 3288 } 3289 3290 /// \brief Emit a proxy function which accepts kmp_task_t as the second 3291 /// argument. 3292 /// \code 3293 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3294 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3295 /// For taskloops: 3296 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3297 /// tt->shareds); 3298 /// return 0; 3299 /// } 3300 /// \endcode 3301 static llvm::Value * 3302 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3303 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3304 QualType KmpTaskTWithPrivatesPtrQTy, 3305 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3306 QualType SharedsPtrTy, llvm::Value *TaskFunction, 3307 llvm::Value *TaskPrivatesMap) { 3308 auto &C = CGM.getContext(); 3309 FunctionArgList Args; 3310 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3311 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3312 /*Id=*/nullptr, 3313 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3314 Args.push_back(&GtidArg); 3315 Args.push_back(&TaskTypeArg); 3316 auto &TaskEntryFnInfo = 3317 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3318 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3319 auto *TaskEntry = 3320 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 3321 ".omp_task_entry.", &CGM.getModule()); 3322 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 3323 CodeGenFunction CGF(CGM); 3324 CGF.disableDebugInfo(); 3325 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 3326 3327 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3328 // tt, 3329 // For taskloops: 3330 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3331 // tt->task_data.shareds); 3332 auto *GtidParam = CGF.EmitLoadOfScalar( 3333 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3334 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3335 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3336 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3337 auto *KmpTaskTWithPrivatesQTyRD = 3338 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3339 LValue Base = 3340 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3341 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3342 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3343 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3344 auto *PartidParam = PartIdLVal.getPointer(); 3345 3346 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3347 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3348 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3349 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 3350 CGF.ConvertTypeForMem(SharedsPtrTy)); 3351 3352 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3353 llvm::Value *PrivatesParam; 3354 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3355 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3356 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3357 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3358 } else 3359 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3360 3361 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3362 TaskPrivatesMap, 3363 CGF.Builder 3364 .CreatePointerBitCastOrAddrSpaceCast( 3365 TDBase.getAddress(), CGF.VoidPtrTy) 3366 .getPointer()}; 3367 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3368 std::end(CommonArgs)); 3369 if (isOpenMPTaskLoopDirective(Kind)) { 3370 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3371 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3372 auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); 3373 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3374 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3375 auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); 3376 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3377 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 3378 auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); 3379 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3380 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 3381 auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); 3382 CallArgs.push_back(LBParam); 3383 CallArgs.push_back(UBParam); 3384 CallArgs.push_back(StParam); 3385 CallArgs.push_back(LIParam); 3386 } 3387 CallArgs.push_back(SharedsParam); 3388 3389 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 3390 CGF.EmitStoreThroughLValue( 3391 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3392 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3393 CGF.FinishFunction(); 3394 return TaskEntry; 3395 } 3396 3397 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3398 SourceLocation Loc, 3399 QualType KmpInt32Ty, 3400 QualType KmpTaskTWithPrivatesPtrQTy, 3401 QualType KmpTaskTWithPrivatesQTy) { 3402 auto &C = CGM.getContext(); 3403 FunctionArgList Args; 3404 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3405 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3406 /*Id=*/nullptr, 3407 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3408 Args.push_back(&GtidArg); 3409 Args.push_back(&TaskTypeArg); 3410 FunctionType::ExtInfo Info; 3411 auto &DestructorFnInfo = 3412 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3413 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 3414 auto *DestructorFn = 3415 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3416 ".omp_task_destructor.", &CGM.getModule()); 3417 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 3418 DestructorFnInfo); 3419 CodeGenFunction CGF(CGM); 3420 CGF.disableDebugInfo(); 3421 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3422 Args); 3423 3424 LValue Base = CGF.EmitLoadOfPointerLValue( 3425 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3426 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3427 auto *KmpTaskTWithPrivatesQTyRD = 3428 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3429 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3430 Base = CGF.EmitLValueForField(Base, *FI); 3431 for (auto *Field : 3432 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3433 if (auto DtorKind = Field->getType().isDestructedType()) { 3434 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 3435 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3436 } 3437 } 3438 CGF.FinishFunction(); 3439 return DestructorFn; 3440 } 3441 3442 /// \brief Emit a privates mapping function for correct handling of private and 3443 /// firstprivate variables. 3444 /// \code 3445 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3446 /// **noalias priv1,..., <tyn> **noalias privn) { 3447 /// *priv1 = &.privates.priv1; 3448 /// ...; 3449 /// *privn = &.privates.privn; 3450 /// } 3451 /// \endcode 3452 static llvm::Value * 3453 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3454 ArrayRef<const Expr *> PrivateVars, 3455 ArrayRef<const Expr *> FirstprivateVars, 3456 ArrayRef<const Expr *> LastprivateVars, 3457 QualType PrivatesQTy, 3458 ArrayRef<PrivateDataTy> Privates) { 3459 auto &C = CGM.getContext(); 3460 FunctionArgList Args; 3461 ImplicitParamDecl TaskPrivatesArg( 3462 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3463 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 3464 Args.push_back(&TaskPrivatesArg); 3465 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3466 unsigned Counter = 1; 3467 for (auto *E: PrivateVars) { 3468 Args.push_back(ImplicitParamDecl::Create( 3469 C, /*DC=*/nullptr, Loc, 3470 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3471 .withConst() 3472 .withRestrict())); 3473 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3474 PrivateVarsPos[VD] = Counter; 3475 ++Counter; 3476 } 3477 for (auto *E : FirstprivateVars) { 3478 Args.push_back(ImplicitParamDecl::Create( 3479 C, /*DC=*/nullptr, Loc, 3480 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3481 .withConst() 3482 .withRestrict())); 3483 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3484 PrivateVarsPos[VD] = Counter; 3485 ++Counter; 3486 } 3487 for (auto *E: LastprivateVars) { 3488 Args.push_back(ImplicitParamDecl::Create( 3489 C, /*DC=*/nullptr, Loc, 3490 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3491 .withConst() 3492 .withRestrict())); 3493 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3494 PrivateVarsPos[VD] = Counter; 3495 ++Counter; 3496 } 3497 auto &TaskPrivatesMapFnInfo = 3498 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3499 auto *TaskPrivatesMapTy = 3500 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3501 auto *TaskPrivatesMap = llvm::Function::Create( 3502 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 3503 ".omp_task_privates_map.", &CGM.getModule()); 3504 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 3505 TaskPrivatesMapFnInfo); 3506 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3507 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3508 CodeGenFunction CGF(CGM); 3509 CGF.disableDebugInfo(); 3510 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3511 TaskPrivatesMapFnInfo, Args); 3512 3513 // *privi = &.privates.privi; 3514 LValue Base = CGF.EmitLoadOfPointerLValue( 3515 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3516 TaskPrivatesArg.getType()->castAs<PointerType>()); 3517 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3518 Counter = 0; 3519 for (auto *Field : PrivatesQTyRD->fields()) { 3520 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 3521 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3522 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3523 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3524 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3525 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 3526 ++Counter; 3527 } 3528 CGF.FinishFunction(); 3529 return TaskPrivatesMap; 3530 } 3531 3532 static int array_pod_sort_comparator(const PrivateDataTy *P1, 3533 const PrivateDataTy *P2) { 3534 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 3535 } 3536 3537 /// Emit initialization for private variables in task-based directives. 3538 static void emitPrivatesInit(CodeGenFunction &CGF, 3539 const OMPExecutableDirective &D, 3540 Address KmpTaskSharedsPtr, LValue TDBase, 3541 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3542 QualType SharedsTy, QualType SharedsPtrTy, 3543 const OMPTaskDataTy &Data, 3544 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3545 auto &C = CGF.getContext(); 3546 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3547 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3548 LValue SrcBase; 3549 if (!Data.FirstprivateVars.empty()) { 3550 SrcBase = CGF.MakeAddrLValue( 3551 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3552 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3553 SharedsTy); 3554 } 3555 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 3556 cast<CapturedStmt>(*D.getAssociatedStmt())); 3557 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3558 for (auto &&Pair : Privates) { 3559 auto *VD = Pair.second.PrivateCopy; 3560 auto *Init = VD->getAnyInitializer(); 3561 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3562 !CGF.isTrivialInitializer(Init)))) { 3563 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3564 if (auto *Elem = Pair.second.PrivateElemInit) { 3565 auto *OriginalVD = Pair.second.Original; 3566 auto *SharedField = CapturesInfo.lookup(OriginalVD); 3567 auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3568 SharedRefLValue = CGF.MakeAddrLValue( 3569 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 3570 SharedRefLValue.getType(), AlignmentSource::Decl); 3571 QualType Type = OriginalVD->getType(); 3572 if (Type->isArrayType()) { 3573 // Initialize firstprivate array. 3574 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3575 // Perform simple memcpy. 3576 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 3577 SharedRefLValue.getAddress(), Type); 3578 } else { 3579 // Initialize firstprivate array using element-by-element 3580 // initialization. 3581 CGF.EmitOMPAggregateAssign( 3582 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 3583 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3584 Address SrcElement) { 3585 // Clean up any temporaries needed by the initialization. 3586 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3587 InitScope.addPrivate( 3588 Elem, [SrcElement]() -> Address { return SrcElement; }); 3589 (void)InitScope.Privatize(); 3590 // Emit initialization for single element. 3591 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3592 CGF, &CapturesInfo); 3593 CGF.EmitAnyExprToMem(Init, DestElement, 3594 Init->getType().getQualifiers(), 3595 /*IsInitializer=*/false); 3596 }); 3597 } 3598 } else { 3599 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3600 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 3601 return SharedRefLValue.getAddress(); 3602 }); 3603 (void)InitScope.Privatize(); 3604 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3605 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3606 /*capturedByInit=*/false); 3607 } 3608 } else 3609 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3610 } 3611 ++FI; 3612 } 3613 } 3614 3615 /// Check if duplication function is required for taskloops. 3616 static bool checkInitIsRequired(CodeGenFunction &CGF, 3617 ArrayRef<PrivateDataTy> Privates) { 3618 bool InitRequired = false; 3619 for (auto &&Pair : Privates) { 3620 auto *VD = Pair.second.PrivateCopy; 3621 auto *Init = VD->getAnyInitializer(); 3622 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3623 !CGF.isTrivialInitializer(Init)); 3624 } 3625 return InitRequired; 3626 } 3627 3628 3629 /// Emit task_dup function (for initialization of 3630 /// private/firstprivate/lastprivate vars and last_iter flag) 3631 /// \code 3632 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3633 /// lastpriv) { 3634 /// // setup lastprivate flag 3635 /// task_dst->last = lastpriv; 3636 /// // could be constructor calls here... 3637 /// } 3638 /// \endcode 3639 static llvm::Value * 3640 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3641 const OMPExecutableDirective &D, 3642 QualType KmpTaskTWithPrivatesPtrQTy, 3643 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3644 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3645 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3646 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3647 auto &C = CGM.getContext(); 3648 FunctionArgList Args; 3649 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, 3650 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3651 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, 3652 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3653 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, 3654 /*Id=*/nullptr, C.IntTy); 3655 Args.push_back(&DstArg); 3656 Args.push_back(&SrcArg); 3657 Args.push_back(&LastprivArg); 3658 auto &TaskDupFnInfo = 3659 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3660 auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3661 auto *TaskDup = 3662 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 3663 ".omp_task_dup.", &CGM.getModule()); 3664 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); 3665 CodeGenFunction CGF(CGM); 3666 CGF.disableDebugInfo(); 3667 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); 3668 3669 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3670 CGF.GetAddrOfLocalVar(&DstArg), 3671 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3672 // task_dst->liter = lastpriv; 3673 if (WithLastIter) { 3674 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3675 LValue Base = CGF.EmitLValueForField( 3676 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3677 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3678 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3679 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3680 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3681 } 3682 3683 // Emit initial values for private copies (if any). 3684 assert(!Privates.empty()); 3685 Address KmpTaskSharedsPtr = Address::invalid(); 3686 if (!Data.FirstprivateVars.empty()) { 3687 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3688 CGF.GetAddrOfLocalVar(&SrcArg), 3689 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3690 LValue Base = CGF.EmitLValueForField( 3691 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3692 KmpTaskSharedsPtr = Address( 3693 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3694 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3695 KmpTaskTShareds)), 3696 Loc), 3697 CGF.getNaturalTypeAlignment(SharedsTy)); 3698 } 3699 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3700 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3701 CGF.FinishFunction(); 3702 return TaskDup; 3703 } 3704 3705 /// Checks if destructor function is required to be generated. 3706 /// \return true if cleanups are required, false otherwise. 3707 static bool 3708 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 3709 bool NeedsCleanup = false; 3710 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3711 auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 3712 for (auto *FD : PrivateRD->fields()) { 3713 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 3714 if (NeedsCleanup) 3715 break; 3716 } 3717 return NeedsCleanup; 3718 } 3719 3720 CGOpenMPRuntime::TaskResultTy 3721 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3722 const OMPExecutableDirective &D, 3723 llvm::Value *TaskFunction, QualType SharedsTy, 3724 Address Shareds, const OMPTaskDataTy &Data) { 3725 auto &C = CGM.getContext(); 3726 llvm::SmallVector<PrivateDataTy, 4> Privates; 3727 // Aggregate privates and sort them by the alignment. 3728 auto I = Data.PrivateCopies.begin(); 3729 for (auto *E : Data.PrivateVars) { 3730 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3731 Privates.push_back(std::make_pair( 3732 C.getDeclAlign(VD), 3733 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3734 /*PrivateElemInit=*/nullptr))); 3735 ++I; 3736 } 3737 I = Data.FirstprivateCopies.begin(); 3738 auto IElemInitRef = Data.FirstprivateInits.begin(); 3739 for (auto *E : Data.FirstprivateVars) { 3740 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3741 Privates.push_back(std::make_pair( 3742 C.getDeclAlign(VD), 3743 PrivateHelpersTy( 3744 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3745 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 3746 ++I; 3747 ++IElemInitRef; 3748 } 3749 I = Data.LastprivateCopies.begin(); 3750 for (auto *E : Data.LastprivateVars) { 3751 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3752 Privates.push_back(std::make_pair( 3753 C.getDeclAlign(VD), 3754 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3755 /*PrivateElemInit=*/nullptr))); 3756 ++I; 3757 } 3758 llvm::array_pod_sort(Privates.begin(), Privates.end(), 3759 array_pod_sort_comparator); 3760 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3761 // Build type kmp_routine_entry_t (if not built yet). 3762 emitKmpRoutineEntryT(KmpInt32Ty); 3763 // Build type kmp_task_t (if not built yet). 3764 if (KmpTaskTQTy.isNull()) { 3765 KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3766 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3767 } 3768 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3769 // Build particular struct kmp_task_t for the given task. 3770 auto *KmpTaskTWithPrivatesQTyRD = 3771 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3772 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3773 QualType KmpTaskTWithPrivatesPtrQTy = 3774 C.getPointerType(KmpTaskTWithPrivatesQTy); 3775 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3776 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 3777 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3778 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3779 3780 // Emit initial values for private copies (if any). 3781 llvm::Value *TaskPrivatesMap = nullptr; 3782 auto *TaskPrivatesMapTy = 3783 std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); 3784 if (!Privates.empty()) { 3785 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3786 TaskPrivatesMap = emitTaskPrivateMappingFunction( 3787 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 3788 FI->getType(), Privates); 3789 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3790 TaskPrivatesMap, TaskPrivatesMapTy); 3791 } else { 3792 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3793 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3794 } 3795 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3796 // kmp_task_t *tt); 3797 auto *TaskEntry = emitProxyTaskFunction( 3798 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3799 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3800 TaskPrivatesMap); 3801 3802 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3803 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3804 // kmp_routine_entry_t *task_entry); 3805 // Task flags. Format is taken from 3806 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 3807 // description of kmp_tasking_flags struct. 3808 enum { 3809 TiedFlag = 0x1, 3810 FinalFlag = 0x2, 3811 DestructorsFlag = 0x8, 3812 PriorityFlag = 0x20 3813 }; 3814 unsigned Flags = Data.Tied ? TiedFlag : 0; 3815 bool NeedsCleanup = false; 3816 if (!Privates.empty()) { 3817 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 3818 if (NeedsCleanup) 3819 Flags = Flags | DestructorsFlag; 3820 } 3821 if (Data.Priority.getInt()) 3822 Flags = Flags | PriorityFlag; 3823 auto *TaskFlags = 3824 Data.Final.getPointer() 3825 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3826 CGF.Builder.getInt32(FinalFlag), 3827 CGF.Builder.getInt32(/*C=*/0)) 3828 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3829 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3830 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3831 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 3832 getThreadID(CGF, Loc), TaskFlags, 3833 KmpTaskTWithPrivatesTySize, SharedsSize, 3834 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3835 TaskEntry, KmpRoutineEntryPtrTy)}; 3836 auto *NewTask = CGF.EmitRuntimeCall( 3837 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 3838 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3839 NewTask, KmpTaskTWithPrivatesPtrTy); 3840 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 3841 KmpTaskTWithPrivatesQTy); 3842 LValue TDBase = 3843 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3844 // Fill the data in the resulting kmp_task_t record. 3845 // Copy shareds if there are any. 3846 Address KmpTaskSharedsPtr = Address::invalid(); 3847 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3848 KmpTaskSharedsPtr = 3849 Address(CGF.EmitLoadOfScalar( 3850 CGF.EmitLValueForField( 3851 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 3852 KmpTaskTShareds)), 3853 Loc), 3854 CGF.getNaturalTypeAlignment(SharedsTy)); 3855 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 3856 } 3857 // Emit initial values for private copies (if any). 3858 TaskResultTy Result; 3859 if (!Privates.empty()) { 3860 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3861 SharedsTy, SharedsPtrTy, Data, Privates, 3862 /*ForDup=*/false); 3863 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3864 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3865 Result.TaskDupFn = emitTaskDupFunction( 3866 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3867 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3868 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3869 } 3870 } 3871 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 3872 enum { Priority = 0, Destructors = 1 }; 3873 // Provide pointer to function with destructors for privates. 3874 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 3875 auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); 3876 if (NeedsCleanup) { 3877 llvm::Value *DestructorFn = emitDestructorsFunction( 3878 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3879 KmpTaskTWithPrivatesQTy); 3880 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 3881 LValue DestructorsLV = CGF.EmitLValueForField( 3882 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 3883 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3884 DestructorFn, KmpRoutineEntryPtrTy), 3885 DestructorsLV); 3886 } 3887 // Set priority. 3888 if (Data.Priority.getInt()) { 3889 LValue Data2LV = CGF.EmitLValueForField( 3890 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 3891 LValue PriorityLV = CGF.EmitLValueForField( 3892 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 3893 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 3894 } 3895 Result.NewTask = NewTask; 3896 Result.TaskEntry = TaskEntry; 3897 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3898 Result.TDBase = TDBase; 3899 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3900 return Result; 3901 } 3902 3903 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 3904 const OMPExecutableDirective &D, 3905 llvm::Value *TaskFunction, 3906 QualType SharedsTy, Address Shareds, 3907 const Expr *IfCond, 3908 const OMPTaskDataTy &Data) { 3909 if (!CGF.HaveInsertPoint()) 3910 return; 3911 3912 TaskResultTy Result = 3913 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 3914 llvm::Value *NewTask = Result.NewTask; 3915 llvm::Value *TaskEntry = Result.TaskEntry; 3916 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 3917 LValue TDBase = Result.TDBase; 3918 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 3919 auto &C = CGM.getContext(); 3920 // Process list of dependences. 3921 Address DependenciesArray = Address::invalid(); 3922 unsigned NumDependencies = Data.Dependences.size(); 3923 if (NumDependencies) { 3924 // Dependence kind for RTL. 3925 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 3926 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 3927 RecordDecl *KmpDependInfoRD; 3928 QualType FlagsTy = 3929 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 3930 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 3931 if (KmpDependInfoTy.isNull()) { 3932 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 3933 KmpDependInfoRD->startDefinition(); 3934 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 3935 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 3936 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 3937 KmpDependInfoRD->completeDefinition(); 3938 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 3939 } else 3940 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 3941 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 3942 // Define type kmp_depend_info[<Dependences.size()>]; 3943 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 3944 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 3945 ArrayType::Normal, /*IndexTypeQuals=*/0); 3946 // kmp_depend_info[<Dependences.size()>] deps; 3947 DependenciesArray = 3948 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 3949 for (unsigned i = 0; i < NumDependencies; ++i) { 3950 const Expr *E = Data.Dependences[i].second; 3951 auto Addr = CGF.EmitLValue(E); 3952 llvm::Value *Size; 3953 QualType Ty = E->getType(); 3954 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3955 LValue UpAddrLVal = 3956 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 3957 llvm::Value *UpAddr = 3958 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 3959 llvm::Value *LowIntPtr = 3960 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 3961 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 3962 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3963 } else 3964 Size = CGF.getTypeSize(Ty); 3965 auto Base = CGF.MakeAddrLValue( 3966 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 3967 KmpDependInfoTy); 3968 // deps[i].base_addr = &<Dependences[i].second>; 3969 auto BaseAddrLVal = CGF.EmitLValueForField( 3970 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 3971 CGF.EmitStoreOfScalar( 3972 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 3973 BaseAddrLVal); 3974 // deps[i].len = sizeof(<Dependences[i].second>); 3975 auto LenLVal = CGF.EmitLValueForField( 3976 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 3977 CGF.EmitStoreOfScalar(Size, LenLVal); 3978 // deps[i].flags = <Dependences[i].first>; 3979 RTLDependenceKindTy DepKind; 3980 switch (Data.Dependences[i].first) { 3981 case OMPC_DEPEND_in: 3982 DepKind = DepIn; 3983 break; 3984 // Out and InOut dependencies must use the same code. 3985 case OMPC_DEPEND_out: 3986 case OMPC_DEPEND_inout: 3987 DepKind = DepInOut; 3988 break; 3989 case OMPC_DEPEND_source: 3990 case OMPC_DEPEND_sink: 3991 case OMPC_DEPEND_unknown: 3992 llvm_unreachable("Unknown task dependence type"); 3993 } 3994 auto FlagsLVal = CGF.EmitLValueForField( 3995 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 3996 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 3997 FlagsLVal); 3998 } 3999 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4000 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 4001 CGF.VoidPtrTy); 4002 } 4003 4004 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4005 // libcall. 4006 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4007 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4008 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4009 // list is not empty 4010 auto *ThreadID = getThreadID(CGF, Loc); 4011 auto *UpLoc = emitUpdateLocation(CGF, Loc); 4012 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4013 llvm::Value *DepTaskArgs[7]; 4014 if (NumDependencies) { 4015 DepTaskArgs[0] = UpLoc; 4016 DepTaskArgs[1] = ThreadID; 4017 DepTaskArgs[2] = NewTask; 4018 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 4019 DepTaskArgs[4] = DependenciesArray.getPointer(); 4020 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4021 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4022 } 4023 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 4024 &TaskArgs, 4025 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4026 if (!Data.Tied) { 4027 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4028 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4029 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4030 } 4031 if (NumDependencies) { 4032 CGF.EmitRuntimeCall( 4033 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 4034 } else { 4035 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 4036 TaskArgs); 4037 } 4038 // Check if parent region is untied and build return for untied task; 4039 if (auto *Region = 4040 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4041 Region->emitUntiedSwitch(CGF); 4042 }; 4043 4044 llvm::Value *DepWaitTaskArgs[6]; 4045 if (NumDependencies) { 4046 DepWaitTaskArgs[0] = UpLoc; 4047 DepWaitTaskArgs[1] = ThreadID; 4048 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 4049 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4050 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4051 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4052 } 4053 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 4054 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, 4055 PrePostActionTy &) { 4056 auto &RT = CGF.CGM.getOpenMPRuntime(); 4057 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4058 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4059 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4060 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4061 // is specified. 4062 if (NumDependencies) 4063 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 4064 DepWaitTaskArgs); 4065 // Call proxy_task_entry(gtid, new_task); 4066 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( 4067 CodeGenFunction &CGF, PrePostActionTy &Action) { 4068 Action.Enter(CGF); 4069 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4070 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 4071 }; 4072 4073 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4074 // kmp_task_t *new_task); 4075 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4076 // kmp_task_t *new_task); 4077 RegionCodeGenTy RCG(CodeGen); 4078 CommonActionTy Action( 4079 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 4080 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 4081 RCG.setAction(Action); 4082 RCG(CGF); 4083 }; 4084 4085 if (IfCond) 4086 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4087 else { 4088 RegionCodeGenTy ThenRCG(ThenCodeGen); 4089 ThenRCG(CGF); 4090 } 4091 } 4092 4093 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4094 const OMPLoopDirective &D, 4095 llvm::Value *TaskFunction, 4096 QualType SharedsTy, Address Shareds, 4097 const Expr *IfCond, 4098 const OMPTaskDataTy &Data) { 4099 if (!CGF.HaveInsertPoint()) 4100 return; 4101 TaskResultTy Result = 4102 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4103 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4104 // libcall. 4105 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4106 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4107 // sched, kmp_uint64 grainsize, void *task_dup); 4108 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4109 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4110 llvm::Value *IfVal; 4111 if (IfCond) { 4112 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4113 /*isSigned=*/true); 4114 } else 4115 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4116 4117 LValue LBLVal = CGF.EmitLValueForField( 4118 Result.TDBase, 4119 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4120 auto *LBVar = 4121 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4122 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4123 /*IsInitializer=*/true); 4124 LValue UBLVal = CGF.EmitLValueForField( 4125 Result.TDBase, 4126 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4127 auto *UBVar = 4128 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4129 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4130 /*IsInitializer=*/true); 4131 LValue StLVal = CGF.EmitLValueForField( 4132 Result.TDBase, 4133 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4134 auto *StVar = 4135 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4136 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4137 /*IsInitializer=*/true); 4138 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4139 llvm::Value *TaskArgs[] = { 4140 UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), 4141 UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), 4142 llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), 4143 llvm::ConstantInt::getSigned( 4144 CGF.IntTy, Data.Schedule.getPointer() 4145 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4146 : NoSchedule), 4147 Data.Schedule.getPointer() 4148 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4149 /*isSigned=*/false) 4150 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4151 Result.TaskDupFn 4152 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, 4153 CGF.VoidPtrTy) 4154 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4155 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 4156 } 4157 4158 /// \brief Emit reduction operation for each element of array (required for 4159 /// array sections) LHS op = RHS. 4160 /// \param Type Type of array. 4161 /// \param LHSVar Variable on the left side of the reduction operation 4162 /// (references element of array in original variable). 4163 /// \param RHSVar Variable on the right side of the reduction operation 4164 /// (references element of array in original variable). 4165 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4166 /// RHSVar. 4167 static void EmitOMPAggregateReduction( 4168 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4169 const VarDecl *RHSVar, 4170 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4171 const Expr *, const Expr *)> &RedOpGen, 4172 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4173 const Expr *UpExpr = nullptr) { 4174 // Perform element-by-element initialization. 4175 QualType ElementTy; 4176 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4177 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4178 4179 // Drill down to the base element type on both arrays. 4180 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 4181 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4182 4183 auto RHSBegin = RHSAddr.getPointer(); 4184 auto LHSBegin = LHSAddr.getPointer(); 4185 // Cast from pointer to array type to pointer to single element. 4186 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 4187 // The basic structure here is a while-do loop. 4188 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4189 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4190 auto IsEmpty = 4191 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4192 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4193 4194 // Enter the loop body, making that address the current address. 4195 auto EntryBB = CGF.Builder.GetInsertBlock(); 4196 CGF.EmitBlock(BodyBB); 4197 4198 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4199 4200 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4201 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4202 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4203 Address RHSElementCurrent = 4204 Address(RHSElementPHI, 4205 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4206 4207 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4208 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4209 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4210 Address LHSElementCurrent = 4211 Address(LHSElementPHI, 4212 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4213 4214 // Emit copy. 4215 CodeGenFunction::OMPPrivateScope Scope(CGF); 4216 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 4217 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 4218 Scope.Privatize(); 4219 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4220 Scope.ForceCleanup(); 4221 4222 // Shift the address forward by one element. 4223 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4224 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 4225 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4226 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 4227 // Check whether we've reached the end. 4228 auto Done = 4229 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4230 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4231 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4232 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4233 4234 // Done. 4235 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4236 } 4237 4238 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4239 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4240 /// UDR combiner function. 4241 static void emitReductionCombiner(CodeGenFunction &CGF, 4242 const Expr *ReductionOp) { 4243 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4244 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4245 if (auto *DRE = 4246 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4247 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4248 std::pair<llvm::Function *, llvm::Function *> Reduction = 4249 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4250 RValue Func = RValue::get(Reduction.first); 4251 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4252 CGF.EmitIgnoredExpr(ReductionOp); 4253 return; 4254 } 4255 CGF.EmitIgnoredExpr(ReductionOp); 4256 } 4257 4258 llvm::Value *CGOpenMPRuntime::emitReductionFunction( 4259 CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 4260 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 4261 ArrayRef<const Expr *> ReductionOps) { 4262 auto &C = CGM.getContext(); 4263 4264 // void reduction_func(void *LHSArg, void *RHSArg); 4265 FunctionArgList Args; 4266 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4267 C.VoidPtrTy); 4268 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4269 C.VoidPtrTy); 4270 Args.push_back(&LHSArg); 4271 Args.push_back(&RHSArg); 4272 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4273 auto *Fn = llvm::Function::Create( 4274 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 4275 ".omp.reduction.reduction_func", &CGM.getModule()); 4276 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 4277 CodeGenFunction CGF(CGM); 4278 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 4279 4280 // Dst = (void*[n])(LHSArg); 4281 // Src = (void*[n])(RHSArg); 4282 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4283 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4284 ArgsType), CGF.getPointerAlign()); 4285 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4286 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4287 ArgsType), CGF.getPointerAlign()); 4288 4289 // ... 4290 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4291 // ... 4292 CodeGenFunction::OMPPrivateScope Scope(CGF); 4293 auto IPriv = Privates.begin(); 4294 unsigned Idx = 0; 4295 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4296 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4297 Scope.addPrivate(RHSVar, [&]() -> Address { 4298 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 4299 }); 4300 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4301 Scope.addPrivate(LHSVar, [&]() -> Address { 4302 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 4303 }); 4304 QualType PrivTy = (*IPriv)->getType(); 4305 if (PrivTy->isVariablyModifiedType()) { 4306 // Get array size and emit VLA type. 4307 ++Idx; 4308 Address Elem = 4309 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 4310 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4311 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 4312 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4313 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4314 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4315 CGF.EmitVariablyModifiedType(PrivTy); 4316 } 4317 } 4318 Scope.Privatize(); 4319 IPriv = Privates.begin(); 4320 auto ILHS = LHSExprs.begin(); 4321 auto IRHS = RHSExprs.begin(); 4322 for (auto *E : ReductionOps) { 4323 if ((*IPriv)->getType()->isArrayType()) { 4324 // Emit reduction for array section. 4325 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4326 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4327 EmitOMPAggregateReduction( 4328 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4329 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4330 emitReductionCombiner(CGF, E); 4331 }); 4332 } else 4333 // Emit reduction for array subscript or single variable. 4334 emitReductionCombiner(CGF, E); 4335 ++IPriv; 4336 ++ILHS; 4337 ++IRHS; 4338 } 4339 Scope.ForceCleanup(); 4340 CGF.FinishFunction(); 4341 return Fn; 4342 } 4343 4344 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 4345 const Expr *ReductionOp, 4346 const Expr *PrivateRef, 4347 const DeclRefExpr *LHS, 4348 const DeclRefExpr *RHS) { 4349 if (PrivateRef->getType()->isArrayType()) { 4350 // Emit reduction for array section. 4351 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4352 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4353 EmitOMPAggregateReduction( 4354 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4355 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4356 emitReductionCombiner(CGF, ReductionOp); 4357 }); 4358 } else 4359 // Emit reduction for array subscript or single variable. 4360 emitReductionCombiner(CGF, ReductionOp); 4361 } 4362 4363 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4364 ArrayRef<const Expr *> Privates, 4365 ArrayRef<const Expr *> LHSExprs, 4366 ArrayRef<const Expr *> RHSExprs, 4367 ArrayRef<const Expr *> ReductionOps, 4368 ReductionOptionsTy Options) { 4369 if (!CGF.HaveInsertPoint()) 4370 return; 4371 4372 bool WithNowait = Options.WithNowait; 4373 bool SimpleReduction = Options.SimpleReduction; 4374 4375 // Next code should be emitted for reduction: 4376 // 4377 // static kmp_critical_name lock = { 0 }; 4378 // 4379 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4380 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4381 // ... 4382 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4383 // *(Type<n>-1*)rhs[<n>-1]); 4384 // } 4385 // 4386 // ... 4387 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4388 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4389 // RedList, reduce_func, &<lock>)) { 4390 // case 1: 4391 // ... 4392 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4393 // ... 4394 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4395 // break; 4396 // case 2: 4397 // ... 4398 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4399 // ... 4400 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4401 // break; 4402 // default:; 4403 // } 4404 // 4405 // if SimpleReduction is true, only the next code is generated: 4406 // ... 4407 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4408 // ... 4409 4410 auto &C = CGM.getContext(); 4411 4412 if (SimpleReduction) { 4413 CodeGenFunction::RunCleanupsScope Scope(CGF); 4414 auto IPriv = Privates.begin(); 4415 auto ILHS = LHSExprs.begin(); 4416 auto IRHS = RHSExprs.begin(); 4417 for (auto *E : ReductionOps) { 4418 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4419 cast<DeclRefExpr>(*IRHS)); 4420 ++IPriv; 4421 ++ILHS; 4422 ++IRHS; 4423 } 4424 return; 4425 } 4426 4427 // 1. Build a list of reduction variables. 4428 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4429 auto Size = RHSExprs.size(); 4430 for (auto *E : Privates) { 4431 if (E->getType()->isVariablyModifiedType()) 4432 // Reserve place for array size. 4433 ++Size; 4434 } 4435 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4436 QualType ReductionArrayTy = 4437 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 4438 /*IndexTypeQuals=*/0); 4439 Address ReductionList = 4440 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4441 auto IPriv = Privates.begin(); 4442 unsigned Idx = 0; 4443 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4444 Address Elem = 4445 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 4446 CGF.Builder.CreateStore( 4447 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4448 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 4449 Elem); 4450 if ((*IPriv)->getType()->isVariablyModifiedType()) { 4451 // Store array size. 4452 ++Idx; 4453 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 4454 CGF.getPointerSize()); 4455 llvm::Value *Size = CGF.Builder.CreateIntCast( 4456 CGF.getVLASize( 4457 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 4458 .first, 4459 CGF.SizeTy, /*isSigned=*/false); 4460 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 4461 Elem); 4462 } 4463 } 4464 4465 // 2. Emit reduce_func(). 4466 auto *ReductionFn = emitReductionFunction( 4467 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 4468 LHSExprs, RHSExprs, ReductionOps); 4469 4470 // 3. Create static kmp_critical_name lock = { 0 }; 4471 auto *Lock = getCriticalRegionLock(".reduction"); 4472 4473 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4474 // RedList, reduce_func, &<lock>); 4475 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 4476 auto *ThreadId = getThreadID(CGF, Loc); 4477 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 4478 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4479 ReductionList.getPointer(), CGF.VoidPtrTy); 4480 llvm::Value *Args[] = { 4481 IdentTLoc, // ident_t *<loc> 4482 ThreadId, // i32 <gtid> 4483 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 4484 ReductionArrayTySize, // size_type sizeof(RedList) 4485 RL, // void *RedList 4486 ReductionFn, // void (*) (void *, void *) <reduce_func> 4487 Lock // kmp_critical_name *&<lock> 4488 }; 4489 auto Res = CGF.EmitRuntimeCall( 4490 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 4491 : OMPRTL__kmpc_reduce), 4492 Args); 4493 4494 // 5. Build switch(res) 4495 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 4496 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 4497 4498 // 6. Build case 1: 4499 // ... 4500 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4501 // ... 4502 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4503 // break; 4504 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 4505 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 4506 CGF.EmitBlock(Case1BB); 4507 4508 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4509 llvm::Value *EndArgs[] = { 4510 IdentTLoc, // ident_t *<loc> 4511 ThreadId, // i32 <gtid> 4512 Lock // kmp_critical_name *&<lock> 4513 }; 4514 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4515 CodeGenFunction &CGF, PrePostActionTy &Action) { 4516 auto &RT = CGF.CGM.getOpenMPRuntime(); 4517 auto IPriv = Privates.begin(); 4518 auto ILHS = LHSExprs.begin(); 4519 auto IRHS = RHSExprs.begin(); 4520 for (auto *E : ReductionOps) { 4521 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4522 cast<DeclRefExpr>(*IRHS)); 4523 ++IPriv; 4524 ++ILHS; 4525 ++IRHS; 4526 } 4527 }; 4528 RegionCodeGenTy RCG(CodeGen); 4529 CommonActionTy Action( 4530 nullptr, llvm::None, 4531 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 4532 : OMPRTL__kmpc_end_reduce), 4533 EndArgs); 4534 RCG.setAction(Action); 4535 RCG(CGF); 4536 4537 CGF.EmitBranch(DefaultBB); 4538 4539 // 7. Build case 2: 4540 // ... 4541 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4542 // ... 4543 // break; 4544 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 4545 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 4546 CGF.EmitBlock(Case2BB); 4547 4548 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4549 CodeGenFunction &CGF, PrePostActionTy &Action) { 4550 auto ILHS = LHSExprs.begin(); 4551 auto IRHS = RHSExprs.begin(); 4552 auto IPriv = Privates.begin(); 4553 for (auto *E : ReductionOps) { 4554 const Expr *XExpr = nullptr; 4555 const Expr *EExpr = nullptr; 4556 const Expr *UpExpr = nullptr; 4557 BinaryOperatorKind BO = BO_Comma; 4558 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 4559 if (BO->getOpcode() == BO_Assign) { 4560 XExpr = BO->getLHS(); 4561 UpExpr = BO->getRHS(); 4562 } 4563 } 4564 // Try to emit update expression as a simple atomic. 4565 auto *RHSExpr = UpExpr; 4566 if (RHSExpr) { 4567 // Analyze RHS part of the whole expression. 4568 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 4569 RHSExpr->IgnoreParenImpCasts())) { 4570 // If this is a conditional operator, analyze its condition for 4571 // min/max reduction operator. 4572 RHSExpr = ACO->getCond(); 4573 } 4574 if (auto *BORHS = 4575 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 4576 EExpr = BORHS->getRHS(); 4577 BO = BORHS->getOpcode(); 4578 } 4579 } 4580 if (XExpr) { 4581 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4582 auto &&AtomicRedGen = [BO, VD, 4583 Loc](CodeGenFunction &CGF, const Expr *XExpr, 4584 const Expr *EExpr, const Expr *UpExpr) { 4585 LValue X = CGF.EmitLValue(XExpr); 4586 RValue E; 4587 if (EExpr) 4588 E = CGF.EmitAnyExpr(EExpr); 4589 CGF.EmitOMPAtomicSimpleUpdateExpr( 4590 X, E, BO, /*IsXLHSInRHSPart=*/true, 4591 llvm::AtomicOrdering::Monotonic, Loc, 4592 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 4593 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4594 PrivateScope.addPrivate( 4595 VD, [&CGF, VD, XRValue, Loc]() -> Address { 4596 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 4597 CGF.emitOMPSimpleStore( 4598 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 4599 VD->getType().getNonReferenceType(), Loc); 4600 return LHSTemp; 4601 }); 4602 (void)PrivateScope.Privatize(); 4603 return CGF.EmitAnyExpr(UpExpr); 4604 }); 4605 }; 4606 if ((*IPriv)->getType()->isArrayType()) { 4607 // Emit atomic reduction for array section. 4608 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4609 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 4610 AtomicRedGen, XExpr, EExpr, UpExpr); 4611 } else 4612 // Emit atomic reduction for array subscript or single variable. 4613 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 4614 } else { 4615 // Emit as a critical region. 4616 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 4617 const Expr *, const Expr *) { 4618 auto &RT = CGF.CGM.getOpenMPRuntime(); 4619 RT.emitCriticalRegion( 4620 CGF, ".atomic_reduction", 4621 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 4622 Action.Enter(CGF); 4623 emitReductionCombiner(CGF, E); 4624 }, 4625 Loc); 4626 }; 4627 if ((*IPriv)->getType()->isArrayType()) { 4628 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4629 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4630 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4631 CritRedGen); 4632 } else 4633 CritRedGen(CGF, nullptr, nullptr, nullptr); 4634 } 4635 ++ILHS; 4636 ++IRHS; 4637 ++IPriv; 4638 } 4639 }; 4640 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 4641 if (!WithNowait) { 4642 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 4643 llvm::Value *EndArgs[] = { 4644 IdentTLoc, // ident_t *<loc> 4645 ThreadId, // i32 <gtid> 4646 Lock // kmp_critical_name *&<lock> 4647 }; 4648 CommonActionTy Action(nullptr, llvm::None, 4649 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 4650 EndArgs); 4651 AtomicRCG.setAction(Action); 4652 AtomicRCG(CGF); 4653 } else 4654 AtomicRCG(CGF); 4655 4656 CGF.EmitBranch(DefaultBB); 4657 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 4658 } 4659 4660 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 4661 SourceLocation Loc) { 4662 if (!CGF.HaveInsertPoint()) 4663 return; 4664 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 4665 // global_tid); 4666 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 4667 // Ignore return result until untied tasks are supported. 4668 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 4669 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4670 Region->emitUntiedSwitch(CGF); 4671 } 4672 4673 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 4674 OpenMPDirectiveKind InnerKind, 4675 const RegionCodeGenTy &CodeGen, 4676 bool HasCancel) { 4677 if (!CGF.HaveInsertPoint()) 4678 return; 4679 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 4680 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 4681 } 4682 4683 namespace { 4684 enum RTCancelKind { 4685 CancelNoreq = 0, 4686 CancelParallel = 1, 4687 CancelLoop = 2, 4688 CancelSections = 3, 4689 CancelTaskgroup = 4 4690 }; 4691 } // anonymous namespace 4692 4693 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 4694 RTCancelKind CancelKind = CancelNoreq; 4695 if (CancelRegion == OMPD_parallel) 4696 CancelKind = CancelParallel; 4697 else if (CancelRegion == OMPD_for) 4698 CancelKind = CancelLoop; 4699 else if (CancelRegion == OMPD_sections) 4700 CancelKind = CancelSections; 4701 else { 4702 assert(CancelRegion == OMPD_taskgroup); 4703 CancelKind = CancelTaskgroup; 4704 } 4705 return CancelKind; 4706 } 4707 4708 void CGOpenMPRuntime::emitCancellationPointCall( 4709 CodeGenFunction &CGF, SourceLocation Loc, 4710 OpenMPDirectiveKind CancelRegion) { 4711 if (!CGF.HaveInsertPoint()) 4712 return; 4713 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 4714 // global_tid, kmp_int32 cncl_kind); 4715 if (auto *OMPRegionInfo = 4716 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4717 // For 'cancellation point taskgroup', the task region info may not have a 4718 // cancel. This may instead happen in another adjacent task. 4719 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 4720 llvm::Value *Args[] = { 4721 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 4722 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4723 // Ignore return result until untied tasks are supported. 4724 auto *Result = CGF.EmitRuntimeCall( 4725 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 4726 // if (__kmpc_cancellationpoint()) { 4727 // exit from construct; 4728 // } 4729 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4730 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4731 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4732 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4733 CGF.EmitBlock(ExitBB); 4734 // exit from construct; 4735 auto CancelDest = 4736 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4737 CGF.EmitBranchThroughCleanup(CancelDest); 4738 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4739 } 4740 } 4741 } 4742 4743 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 4744 const Expr *IfCond, 4745 OpenMPDirectiveKind CancelRegion) { 4746 if (!CGF.HaveInsertPoint()) 4747 return; 4748 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 4749 // kmp_int32 cncl_kind); 4750 if (auto *OMPRegionInfo = 4751 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4752 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 4753 PrePostActionTy &) { 4754 auto &RT = CGF.CGM.getOpenMPRuntime(); 4755 llvm::Value *Args[] = { 4756 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 4757 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4758 // Ignore return result until untied tasks are supported. 4759 auto *Result = CGF.EmitRuntimeCall( 4760 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 4761 // if (__kmpc_cancel()) { 4762 // exit from construct; 4763 // } 4764 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4765 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4766 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4767 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4768 CGF.EmitBlock(ExitBB); 4769 // exit from construct; 4770 auto CancelDest = 4771 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4772 CGF.EmitBranchThroughCleanup(CancelDest); 4773 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4774 }; 4775 if (IfCond) 4776 emitOMPIfClause(CGF, IfCond, ThenGen, 4777 [](CodeGenFunction &, PrePostActionTy &) {}); 4778 else { 4779 RegionCodeGenTy ThenRCG(ThenGen); 4780 ThenRCG(CGF); 4781 } 4782 } 4783 } 4784 4785 /// \brief Obtain information that uniquely identifies a target entry. This 4786 /// consists of the file and device IDs as well as line number associated with 4787 /// the relevant entry source location. 4788 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 4789 unsigned &DeviceID, unsigned &FileID, 4790 unsigned &LineNum) { 4791 4792 auto &SM = C.getSourceManager(); 4793 4794 // The loc should be always valid and have a file ID (the user cannot use 4795 // #pragma directives in macros) 4796 4797 assert(Loc.isValid() && "Source location is expected to be always valid."); 4798 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 4799 4800 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 4801 assert(PLoc.isValid() && "Source location is expected to be always valid."); 4802 4803 llvm::sys::fs::UniqueID ID; 4804 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 4805 llvm_unreachable("Source file with target region no longer exists!"); 4806 4807 DeviceID = ID.getDevice(); 4808 FileID = ID.getFile(); 4809 LineNum = PLoc.getLine(); 4810 } 4811 4812 void CGOpenMPRuntime::emitTargetOutlinedFunction( 4813 const OMPExecutableDirective &D, StringRef ParentName, 4814 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4815 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4816 assert(!ParentName.empty() && "Invalid target region parent name!"); 4817 4818 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 4819 IsOffloadEntry, CodeGen); 4820 } 4821 4822 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 4823 const OMPExecutableDirective &D, StringRef ParentName, 4824 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4825 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4826 // Create a unique name for the entry function using the source location 4827 // information of the current target region. The name will be something like: 4828 // 4829 // __omp_offloading_DD_FFFF_PP_lBB 4830 // 4831 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 4832 // mangled name of the function that encloses the target region and BB is the 4833 // line number of the target region. 4834 4835 unsigned DeviceID; 4836 unsigned FileID; 4837 unsigned Line; 4838 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 4839 Line); 4840 SmallString<64> EntryFnName; 4841 { 4842 llvm::raw_svector_ostream OS(EntryFnName); 4843 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 4844 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 4845 } 4846 4847 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4848 4849 CodeGenFunction CGF(CGM, true); 4850 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 4851 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4852 4853 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 4854 4855 // If this target outline function is not an offload entry, we don't need to 4856 // register it. 4857 if (!IsOffloadEntry) 4858 return; 4859 4860 // The target region ID is used by the runtime library to identify the current 4861 // target region, so it only has to be unique and not necessarily point to 4862 // anything. It could be the pointer to the outlined function that implements 4863 // the target region, but we aren't using that so that the compiler doesn't 4864 // need to keep that, and could therefore inline the host function if proven 4865 // worthwhile during optimization. In the other hand, if emitting code for the 4866 // device, the ID has to be the function address so that it can retrieved from 4867 // the offloading entry and launched by the runtime library. We also mark the 4868 // outlined function to have external linkage in case we are emitting code for 4869 // the device, because these functions will be entry points to the device. 4870 4871 if (CGM.getLangOpts().OpenMPIsDevice) { 4872 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 4873 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 4874 } else 4875 OutlinedFnID = new llvm::GlobalVariable( 4876 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 4877 llvm::GlobalValue::PrivateLinkage, 4878 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 4879 4880 // Register the information for the entry associated with this target region. 4881 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 4882 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 4883 /*Flags=*/0); 4884 } 4885 4886 /// discard all CompoundStmts intervening between two constructs 4887 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 4888 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 4889 Body = CS->body_front(); 4890 4891 return Body; 4892 } 4893 4894 /// Emit the number of teams for a target directive. Inspect the num_teams 4895 /// clause associated with a teams construct combined or closely nested 4896 /// with the target directive. 4897 /// 4898 /// Emit a team of size one for directives such as 'target parallel' that 4899 /// have no associated teams construct. 4900 /// 4901 /// Otherwise, return nullptr. 4902 static llvm::Value * 4903 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4904 CodeGenFunction &CGF, 4905 const OMPExecutableDirective &D) { 4906 4907 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4908 "teams directive expected to be " 4909 "emitted only for the host!"); 4910 4911 auto &Bld = CGF.Builder; 4912 4913 // If the target directive is combined with a teams directive: 4914 // Return the value in the num_teams clause, if any. 4915 // Otherwise, return 0 to denote the runtime default. 4916 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 4917 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 4918 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 4919 auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 4920 /*IgnoreResultAssign*/ true); 4921 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 4922 /*IsSigned=*/true); 4923 } 4924 4925 // The default value is 0. 4926 return Bld.getInt32(0); 4927 } 4928 4929 // If the target directive is combined with a parallel directive but not a 4930 // teams directive, start one team. 4931 if (isOpenMPParallelDirective(D.getDirectiveKind())) 4932 return Bld.getInt32(1); 4933 4934 // If the current target region has a teams region enclosed, we need to get 4935 // the number of teams to pass to the runtime function call. This is done 4936 // by generating the expression in a inlined region. This is required because 4937 // the expression is captured in the enclosing target environment when the 4938 // teams directive is not combined with target. 4939 4940 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4941 4942 // FIXME: Accommodate other combined directives with teams when they become 4943 // available. 4944 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4945 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4946 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 4947 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4948 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4949 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 4950 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 4951 /*IsSigned=*/true); 4952 } 4953 4954 // If we have an enclosed teams directive but no num_teams clause we use 4955 // the default value 0. 4956 return Bld.getInt32(0); 4957 } 4958 4959 // No teams associated with the directive. 4960 return nullptr; 4961 } 4962 4963 /// Emit the number of threads for a target directive. Inspect the 4964 /// thread_limit clause associated with a teams construct combined or closely 4965 /// nested with the target directive. 4966 /// 4967 /// Emit the num_threads clause for directives such as 'target parallel' that 4968 /// have no associated teams construct. 4969 /// 4970 /// Otherwise, return nullptr. 4971 static llvm::Value * 4972 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4973 CodeGenFunction &CGF, 4974 const OMPExecutableDirective &D) { 4975 4976 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4977 "teams directive expected to be " 4978 "emitted only for the host!"); 4979 4980 auto &Bld = CGF.Builder; 4981 4982 // 4983 // If the target directive is combined with a teams directive: 4984 // Return the value in the thread_limit clause, if any. 4985 // 4986 // If the target directive is combined with a parallel directive: 4987 // Return the value in the num_threads clause, if any. 4988 // 4989 // If both clauses are set, select the minimum of the two. 4990 // 4991 // If neither teams or parallel combined directives set the number of threads 4992 // in a team, return 0 to denote the runtime default. 4993 // 4994 // If this is not a teams directive return nullptr. 4995 4996 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 4997 isOpenMPParallelDirective(D.getDirectiveKind())) { 4998 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 4999 llvm::Value *NumThreadsVal = nullptr; 5000 llvm::Value *ThreadLimitVal = nullptr; 5001 5002 if (const auto *ThreadLimitClause = 5003 D.getSingleClause<OMPThreadLimitClause>()) { 5004 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 5005 auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 5006 /*IgnoreResultAssign*/ true); 5007 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 5008 /*IsSigned=*/true); 5009 } 5010 5011 if (const auto *NumThreadsClause = 5012 D.getSingleClause<OMPNumThreadsClause>()) { 5013 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 5014 llvm::Value *NumThreads = 5015 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 5016 /*IgnoreResultAssign*/ true); 5017 NumThreadsVal = 5018 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 5019 } 5020 5021 // Select the lesser of thread_limit and num_threads. 5022 if (NumThreadsVal) 5023 ThreadLimitVal = ThreadLimitVal 5024 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 5025 ThreadLimitVal), 5026 NumThreadsVal, ThreadLimitVal) 5027 : NumThreadsVal; 5028 5029 // Set default value passed to the runtime if either teams or a target 5030 // parallel type directive is found but no clause is specified. 5031 if (!ThreadLimitVal) 5032 ThreadLimitVal = DefaultThreadLimitVal; 5033 5034 return ThreadLimitVal; 5035 } 5036 5037 // If the current target region has a teams region enclosed, we need to get 5038 // the thread limit to pass to the runtime function call. This is done 5039 // by generating the expression in a inlined region. This is required because 5040 // the expression is captured in the enclosing target environment when the 5041 // teams directive is not combined with target. 5042 5043 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5044 5045 // FIXME: Accommodate other combined directives with teams when they become 5046 // available. 5047 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 5048 ignoreCompoundStmts(CS.getCapturedStmt()))) { 5049 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 5050 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 5051 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5052 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 5053 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 5054 /*IsSigned=*/true); 5055 } 5056 5057 // If we have an enclosed teams directive but no thread_limit clause we use 5058 // the default value 0. 5059 return CGF.Builder.getInt32(0); 5060 } 5061 5062 // No teams associated with the directive. 5063 return nullptr; 5064 } 5065 5066 namespace { 5067 // \brief Utility to handle information from clauses associated with a given 5068 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 5069 // It provides a convenient interface to obtain the information and generate 5070 // code for that information. 5071 class MappableExprsHandler { 5072 public: 5073 /// \brief Values for bit flags used to specify the mapping type for 5074 /// offloading. 5075 enum OpenMPOffloadMappingFlags { 5076 /// \brief Allocate memory on the device and move data from host to device. 5077 OMP_MAP_TO = 0x01, 5078 /// \brief Allocate memory on the device and move data from device to host. 5079 OMP_MAP_FROM = 0x02, 5080 /// \brief Always perform the requested mapping action on the element, even 5081 /// if it was already mapped before. 5082 OMP_MAP_ALWAYS = 0x04, 5083 /// \brief Delete the element from the device environment, ignoring the 5084 /// current reference count associated with the element. 5085 OMP_MAP_DELETE = 0x08, 5086 /// \brief The element being mapped is a pointer, therefore the pointee 5087 /// should be mapped as well. 5088 OMP_MAP_IS_PTR = 0x10, 5089 /// \brief This flags signals that an argument is the first one relating to 5090 /// a map/private clause expression. For some cases a single 5091 /// map/privatization results in multiple arguments passed to the runtime 5092 /// library. 5093 OMP_MAP_FIRST_REF = 0x20, 5094 /// \brief Signal that the runtime library has to return the device pointer 5095 /// in the current position for the data being mapped. 5096 OMP_MAP_RETURN_PTR = 0x40, 5097 /// \brief This flag signals that the reference being passed is a pointer to 5098 /// private data. 5099 OMP_MAP_PRIVATE_PTR = 0x80, 5100 /// \brief Pass the element to the device by value. 5101 OMP_MAP_PRIVATE_VAL = 0x100, 5102 }; 5103 5104 /// Class that associates information with a base pointer to be passed to the 5105 /// runtime library. 5106 class BasePointerInfo { 5107 /// The base pointer. 5108 llvm::Value *Ptr = nullptr; 5109 /// The base declaration that refers to this device pointer, or null if 5110 /// there is none. 5111 const ValueDecl *DevPtrDecl = nullptr; 5112 5113 public: 5114 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 5115 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 5116 llvm::Value *operator*() const { return Ptr; } 5117 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 5118 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 5119 }; 5120 5121 typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; 5122 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 5123 typedef SmallVector<unsigned, 16> MapFlagsArrayTy; 5124 5125 private: 5126 /// \brief Directive from where the map clauses were extracted. 5127 const OMPExecutableDirective &CurDir; 5128 5129 /// \brief Function the directive is being generated for. 5130 CodeGenFunction &CGF; 5131 5132 /// \brief Set of all first private variables in the current directive. 5133 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 5134 5135 /// Map between device pointer declarations and their expression components. 5136 /// The key value for declarations in 'this' is null. 5137 llvm::DenseMap< 5138 const ValueDecl *, 5139 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 5140 DevPointersMap; 5141 5142 llvm::Value *getExprTypeSize(const Expr *E) const { 5143 auto ExprTy = E->getType().getCanonicalType(); 5144 5145 // Reference types are ignored for mapping purposes. 5146 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 5147 ExprTy = RefTy->getPointeeType().getCanonicalType(); 5148 5149 // Given that an array section is considered a built-in type, we need to 5150 // do the calculation based on the length of the section instead of relying 5151 // on CGF.getTypeSize(E->getType()). 5152 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 5153 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 5154 OAE->getBase()->IgnoreParenImpCasts()) 5155 .getCanonicalType(); 5156 5157 // If there is no length associated with the expression, that means we 5158 // are using the whole length of the base. 5159 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 5160 return CGF.getTypeSize(BaseTy); 5161 5162 llvm::Value *ElemSize; 5163 if (auto *PTy = BaseTy->getAs<PointerType>()) 5164 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 5165 else { 5166 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 5167 assert(ATy && "Expecting array type if not a pointer type."); 5168 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 5169 } 5170 5171 // If we don't have a length at this point, that is because we have an 5172 // array section with a single element. 5173 if (!OAE->getLength()) 5174 return ElemSize; 5175 5176 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 5177 LengthVal = 5178 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 5179 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 5180 } 5181 return CGF.getTypeSize(ExprTy); 5182 } 5183 5184 /// \brief Return the corresponding bits for a given map clause modifier. Add 5185 /// a flag marking the map as a pointer if requested. Add a flag marking the 5186 /// map as the first one of a series of maps that relate to the same map 5187 /// expression. 5188 unsigned getMapTypeBits(OpenMPMapClauseKind MapType, 5189 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 5190 bool AddIsFirstFlag) const { 5191 unsigned Bits = 0u; 5192 switch (MapType) { 5193 case OMPC_MAP_alloc: 5194 case OMPC_MAP_release: 5195 // alloc and release is the default behavior in the runtime library, i.e. 5196 // if we don't pass any bits alloc/release that is what the runtime is 5197 // going to do. Therefore, we don't need to signal anything for these two 5198 // type modifiers. 5199 break; 5200 case OMPC_MAP_to: 5201 Bits = OMP_MAP_TO; 5202 break; 5203 case OMPC_MAP_from: 5204 Bits = OMP_MAP_FROM; 5205 break; 5206 case OMPC_MAP_tofrom: 5207 Bits = OMP_MAP_TO | OMP_MAP_FROM; 5208 break; 5209 case OMPC_MAP_delete: 5210 Bits = OMP_MAP_DELETE; 5211 break; 5212 default: 5213 llvm_unreachable("Unexpected map type!"); 5214 break; 5215 } 5216 if (AddPtrFlag) 5217 Bits |= OMP_MAP_IS_PTR; 5218 if (AddIsFirstFlag) 5219 Bits |= OMP_MAP_FIRST_REF; 5220 if (MapTypeModifier == OMPC_MAP_always) 5221 Bits |= OMP_MAP_ALWAYS; 5222 return Bits; 5223 } 5224 5225 /// \brief Return true if the provided expression is a final array section. A 5226 /// final array section, is one whose length can't be proved to be one. 5227 bool isFinalArraySectionExpression(const Expr *E) const { 5228 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 5229 5230 // It is not an array section and therefore not a unity-size one. 5231 if (!OASE) 5232 return false; 5233 5234 // An array section with no colon always refer to a single element. 5235 if (OASE->getColonLoc().isInvalid()) 5236 return false; 5237 5238 auto *Length = OASE->getLength(); 5239 5240 // If we don't have a length we have to check if the array has size 1 5241 // for this dimension. Also, we should always expect a length if the 5242 // base type is pointer. 5243 if (!Length) { 5244 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 5245 OASE->getBase()->IgnoreParenImpCasts()) 5246 .getCanonicalType(); 5247 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 5248 return ATy->getSize().getSExtValue() != 1; 5249 // If we don't have a constant dimension length, we have to consider 5250 // the current section as having any size, so it is not necessarily 5251 // unitary. If it happen to be unity size, that's user fault. 5252 return true; 5253 } 5254 5255 // Check if the length evaluates to 1. 5256 llvm::APSInt ConstLength; 5257 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 5258 return true; // Can have more that size 1. 5259 5260 return ConstLength.getSExtValue() != 1; 5261 } 5262 5263 /// \brief Generate the base pointers, section pointers, sizes and map type 5264 /// bits for the provided map type, map modifier, and expression components. 5265 /// \a IsFirstComponent should be set to true if the provided set of 5266 /// components is the first associated with a capture. 5267 void generateInfoForComponentList( 5268 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5269 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5270 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 5271 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 5272 bool IsFirstComponentList) const { 5273 5274 // The following summarizes what has to be generated for each map and the 5275 // types bellow. The generated information is expressed in this order: 5276 // base pointer, section pointer, size, flags 5277 // (to add to the ones that come from the map type and modifier). 5278 // 5279 // double d; 5280 // int i[100]; 5281 // float *p; 5282 // 5283 // struct S1 { 5284 // int i; 5285 // float f[50]; 5286 // } 5287 // struct S2 { 5288 // int i; 5289 // float f[50]; 5290 // S1 s; 5291 // double *p; 5292 // struct S2 *ps; 5293 // } 5294 // S2 s; 5295 // S2 *ps; 5296 // 5297 // map(d) 5298 // &d, &d, sizeof(double), noflags 5299 // 5300 // map(i) 5301 // &i, &i, 100*sizeof(int), noflags 5302 // 5303 // map(i[1:23]) 5304 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 5305 // 5306 // map(p) 5307 // &p, &p, sizeof(float*), noflags 5308 // 5309 // map(p[1:24]) 5310 // p, &p[1], 24*sizeof(float), noflags 5311 // 5312 // map(s) 5313 // &s, &s, sizeof(S2), noflags 5314 // 5315 // map(s.i) 5316 // &s, &(s.i), sizeof(int), noflags 5317 // 5318 // map(s.s.f) 5319 // &s, &(s.i.f), 50*sizeof(int), noflags 5320 // 5321 // map(s.p) 5322 // &s, &(s.p), sizeof(double*), noflags 5323 // 5324 // map(s.p[:22], s.a s.b) 5325 // &s, &(s.p), sizeof(double*), noflags 5326 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag 5327 // 5328 // map(s.ps) 5329 // &s, &(s.ps), sizeof(S2*), noflags 5330 // 5331 // map(s.ps->s.i) 5332 // &s, &(s.ps), sizeof(S2*), noflags 5333 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag 5334 // 5335 // map(s.ps->ps) 5336 // &s, &(s.ps), sizeof(S2*), noflags 5337 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5338 // 5339 // map(s.ps->ps->ps) 5340 // &s, &(s.ps), sizeof(S2*), noflags 5341 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5342 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5343 // 5344 // map(s.ps->ps->s.f[:22]) 5345 // &s, &(s.ps), sizeof(S2*), noflags 5346 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5347 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag 5348 // 5349 // map(ps) 5350 // &ps, &ps, sizeof(S2*), noflags 5351 // 5352 // map(ps->i) 5353 // ps, &(ps->i), sizeof(int), noflags 5354 // 5355 // map(ps->s.f) 5356 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 5357 // 5358 // map(ps->p) 5359 // ps, &(ps->p), sizeof(double*), noflags 5360 // 5361 // map(ps->p[:22]) 5362 // ps, &(ps->p), sizeof(double*), noflags 5363 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag 5364 // 5365 // map(ps->ps) 5366 // ps, &(ps->ps), sizeof(S2*), noflags 5367 // 5368 // map(ps->ps->s.i) 5369 // ps, &(ps->ps), sizeof(S2*), noflags 5370 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag 5371 // 5372 // map(ps->ps->ps) 5373 // ps, &(ps->ps), sizeof(S2*), noflags 5374 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5375 // 5376 // map(ps->ps->ps->ps) 5377 // ps, &(ps->ps), sizeof(S2*), noflags 5378 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5379 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5380 // 5381 // map(ps->ps->ps->s.f[:22]) 5382 // ps, &(ps->ps), sizeof(S2*), noflags 5383 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5384 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + 5385 // extra_flag 5386 5387 // Track if the map information being generated is the first for a capture. 5388 bool IsCaptureFirstInfo = IsFirstComponentList; 5389 5390 // Scan the components from the base to the complete expression. 5391 auto CI = Components.rbegin(); 5392 auto CE = Components.rend(); 5393 auto I = CI; 5394 5395 // Track if the map information being generated is the first for a list of 5396 // components. 5397 bool IsExpressionFirstInfo = true; 5398 llvm::Value *BP = nullptr; 5399 5400 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 5401 // The base is the 'this' pointer. The content of the pointer is going 5402 // to be the base of the field being mapped. 5403 BP = CGF.EmitScalarExpr(ME->getBase()); 5404 } else { 5405 // The base is the reference to the variable. 5406 // BP = &Var. 5407 BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) 5408 .getPointer(); 5409 5410 // If the variable is a pointer and is being dereferenced (i.e. is not 5411 // the last component), the base has to be the pointer itself, not its 5412 // reference. References are ignored for mapping purposes. 5413 QualType Ty = 5414 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 5415 if (Ty->isAnyPointerType() && std::next(I) != CE) { 5416 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); 5417 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 5418 Ty->castAs<PointerType>()) 5419 .getPointer(); 5420 5421 // We do not need to generate individual map information for the 5422 // pointer, it can be associated with the combined storage. 5423 ++I; 5424 } 5425 } 5426 5427 for (; I != CE; ++I) { 5428 auto Next = std::next(I); 5429 5430 // We need to generate the addresses and sizes if this is the last 5431 // component, if the component is a pointer or if it is an array section 5432 // whose length can't be proved to be one. If this is a pointer, it 5433 // becomes the base address for the following components. 5434 5435 // A final array section, is one whose length can't be proved to be one. 5436 bool IsFinalArraySection = 5437 isFinalArraySectionExpression(I->getAssociatedExpression()); 5438 5439 // Get information on whether the element is a pointer. Have to do a 5440 // special treatment for array sections given that they are built-in 5441 // types. 5442 const auto *OASE = 5443 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 5444 bool IsPointer = 5445 (OASE && 5446 OMPArraySectionExpr::getBaseOriginalType(OASE) 5447 .getCanonicalType() 5448 ->isAnyPointerType()) || 5449 I->getAssociatedExpression()->getType()->isAnyPointerType(); 5450 5451 if (Next == CE || IsPointer || IsFinalArraySection) { 5452 5453 // If this is not the last component, we expect the pointer to be 5454 // associated with an array expression or member expression. 5455 assert((Next == CE || 5456 isa<MemberExpr>(Next->getAssociatedExpression()) || 5457 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 5458 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 5459 "Unexpected expression"); 5460 5461 auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); 5462 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 5463 5464 // If we have a member expression and the current component is a 5465 // reference, we have to map the reference too. Whenever we have a 5466 // reference, the section that reference refers to is going to be a 5467 // load instruction from the storage assigned to the reference. 5468 if (isa<MemberExpr>(I->getAssociatedExpression()) && 5469 I->getAssociatedDeclaration()->getType()->isReferenceType()) { 5470 auto *LI = cast<llvm::LoadInst>(LB); 5471 auto *RefAddr = LI->getPointerOperand(); 5472 5473 BasePointers.push_back(BP); 5474 Pointers.push_back(RefAddr); 5475 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 5476 Types.push_back(getMapTypeBits( 5477 /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, 5478 !IsExpressionFirstInfo, IsCaptureFirstInfo)); 5479 IsExpressionFirstInfo = false; 5480 IsCaptureFirstInfo = false; 5481 // The reference will be the next base address. 5482 BP = RefAddr; 5483 } 5484 5485 BasePointers.push_back(BP); 5486 Pointers.push_back(LB); 5487 Sizes.push_back(Size); 5488 5489 // We need to add a pointer flag for each map that comes from the 5490 // same expression except for the first one. We also need to signal 5491 // this map is the first one that relates with the current capture 5492 // (there is a set of entries for each capture). 5493 Types.push_back(getMapTypeBits(MapType, MapTypeModifier, 5494 !IsExpressionFirstInfo, 5495 IsCaptureFirstInfo)); 5496 5497 // If we have a final array section, we are done with this expression. 5498 if (IsFinalArraySection) 5499 break; 5500 5501 // The pointer becomes the base for the next element. 5502 if (Next != CE) 5503 BP = LB; 5504 5505 IsExpressionFirstInfo = false; 5506 IsCaptureFirstInfo = false; 5507 continue; 5508 } 5509 } 5510 } 5511 5512 /// \brief Return the adjusted map modifiers if the declaration a capture 5513 /// refers to appears in a first-private clause. This is expected to be used 5514 /// only with directives that start with 'target'. 5515 unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, 5516 unsigned CurrentModifiers) { 5517 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 5518 5519 // A first private variable captured by reference will use only the 5520 // 'private ptr' and 'map to' flag. Return the right flags if the captured 5521 // declaration is known as first-private in this handler. 5522 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 5523 return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | 5524 MappableExprsHandler::OMP_MAP_TO; 5525 5526 // We didn't modify anything. 5527 return CurrentModifiers; 5528 } 5529 5530 public: 5531 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 5532 : CurDir(Dir), CGF(CGF) { 5533 // Extract firstprivate clause information. 5534 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 5535 for (const auto *D : C->varlists()) 5536 FirstPrivateDecls.insert( 5537 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 5538 // Extract device pointer clause information. 5539 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 5540 for (auto L : C->component_lists()) 5541 DevPointersMap[L.first].push_back(L.second); 5542 } 5543 5544 /// \brief Generate all the base pointers, section pointers, sizes and map 5545 /// types for the extracted mappable expressions. Also, for each item that 5546 /// relates with a device pointer, a pair of the relevant declaration and 5547 /// index where it occurs is appended to the device pointers info array. 5548 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 5549 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 5550 MapFlagsArrayTy &Types) const { 5551 BasePointers.clear(); 5552 Pointers.clear(); 5553 Sizes.clear(); 5554 Types.clear(); 5555 5556 struct MapInfo { 5557 /// Kind that defines how a device pointer has to be returned. 5558 enum ReturnPointerKind { 5559 // Don't have to return any pointer. 5560 RPK_None, 5561 // Pointer is the base of the declaration. 5562 RPK_Base, 5563 // Pointer is a member of the base declaration - 'this' 5564 RPK_Member, 5565 // Pointer is a reference and a member of the base declaration - 'this' 5566 RPK_MemberReference, 5567 }; 5568 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 5569 OpenMPMapClauseKind MapType; 5570 OpenMPMapClauseKind MapTypeModifier; 5571 ReturnPointerKind ReturnDevicePointer; 5572 5573 MapInfo() 5574 : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), 5575 ReturnDevicePointer(RPK_None) {} 5576 MapInfo( 5577 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5578 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5579 ReturnPointerKind ReturnDevicePointer) 5580 : Components(Components), MapType(MapType), 5581 MapTypeModifier(MapTypeModifier), 5582 ReturnDevicePointer(ReturnDevicePointer) {} 5583 }; 5584 5585 // We have to process the component lists that relate with the same 5586 // declaration in a single chunk so that we can generate the map flags 5587 // correctly. Therefore, we organize all lists in a map. 5588 llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 5589 5590 // Helper function to fill the information map for the different supported 5591 // clauses. 5592 auto &&InfoGen = [&Info]( 5593 const ValueDecl *D, 5594 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 5595 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 5596 MapInfo::ReturnPointerKind ReturnDevicePointer) { 5597 const ValueDecl *VD = 5598 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 5599 Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); 5600 }; 5601 5602 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5603 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 5604 for (auto L : C->component_lists()) 5605 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 5606 MapInfo::RPK_None); 5607 for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 5608 for (auto L : C->component_lists()) 5609 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 5610 MapInfo::RPK_None); 5611 for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 5612 for (auto L : C->component_lists()) 5613 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 5614 MapInfo::RPK_None); 5615 5616 // Look at the use_device_ptr clause information and mark the existing map 5617 // entries as such. If there is no map information for an entry in the 5618 // use_device_ptr list, we create one with map type 'alloc' and zero size 5619 // section. It is the user fault if that was not mapped before. 5620 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5621 for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) 5622 for (auto L : C->component_lists()) { 5623 assert(!L.second.empty() && "Not expecting empty list of components!"); 5624 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 5625 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 5626 auto *IE = L.second.back().getAssociatedExpression(); 5627 // If the first component is a member expression, we have to look into 5628 // 'this', which maps to null in the map of map information. Otherwise 5629 // look directly for the information. 5630 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 5631 5632 // We potentially have map information for this declaration already. 5633 // Look for the first set of components that refer to it. 5634 if (It != Info.end()) { 5635 auto CI = std::find_if( 5636 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 5637 return MI.Components.back().getAssociatedDeclaration() == VD; 5638 }); 5639 // If we found a map entry, signal that the pointer has to be returned 5640 // and move on to the next declaration. 5641 if (CI != It->second.end()) { 5642 CI->ReturnDevicePointer = isa<MemberExpr>(IE) 5643 ? (VD->getType()->isReferenceType() 5644 ? MapInfo::RPK_MemberReference 5645 : MapInfo::RPK_Member) 5646 : MapInfo::RPK_Base; 5647 continue; 5648 } 5649 } 5650 5651 // We didn't find any match in our map information - generate a zero 5652 // size array section. 5653 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 5654 llvm::Value *Ptr = 5655 this->CGF 5656 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation()) 5657 .getScalarVal(); 5658 BasePointers.push_back({Ptr, VD}); 5659 Pointers.push_back(Ptr); 5660 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 5661 Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); 5662 } 5663 5664 for (auto &M : Info) { 5665 // We need to know when we generate information for the first component 5666 // associated with a capture, because the mapping flags depend on it. 5667 bool IsFirstComponentList = true; 5668 for (MapInfo &L : M.second) { 5669 assert(!L.Components.empty() && 5670 "Not expecting declaration with no component lists."); 5671 5672 // Remember the current base pointer index. 5673 unsigned CurrentBasePointersIdx = BasePointers.size(); 5674 // FIXME: MSVC 2013 seems to require this-> to find the member method. 5675 this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, 5676 L.Components, BasePointers, Pointers, 5677 Sizes, Types, IsFirstComponentList); 5678 5679 // If this entry relates with a device pointer, set the relevant 5680 // declaration and add the 'return pointer' flag. 5681 if (IsFirstComponentList && 5682 L.ReturnDevicePointer != MapInfo::RPK_None) { 5683 // If the pointer is not the base of the map, we need to skip the 5684 // base. If it is a reference in a member field, we also need to skip 5685 // the map of the reference. 5686 if (L.ReturnDevicePointer != MapInfo::RPK_Base) { 5687 ++CurrentBasePointersIdx; 5688 if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) 5689 ++CurrentBasePointersIdx; 5690 } 5691 assert(BasePointers.size() > CurrentBasePointersIdx && 5692 "Unexpected number of mapped base pointers."); 5693 5694 auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); 5695 assert(RelevantVD && 5696 "No relevant declaration related with device pointer??"); 5697 5698 BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 5699 Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; 5700 } 5701 IsFirstComponentList = false; 5702 } 5703 } 5704 } 5705 5706 /// \brief Generate the base pointers, section pointers, sizes and map types 5707 /// associated to a given capture. 5708 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 5709 llvm::Value *Arg, 5710 MapBaseValuesArrayTy &BasePointers, 5711 MapValuesArrayTy &Pointers, 5712 MapValuesArrayTy &Sizes, 5713 MapFlagsArrayTy &Types) const { 5714 assert(!Cap->capturesVariableArrayType() && 5715 "Not expecting to generate map info for a variable array type!"); 5716 5717 BasePointers.clear(); 5718 Pointers.clear(); 5719 Sizes.clear(); 5720 Types.clear(); 5721 5722 // We need to know when we generating information for the first component 5723 // associated with a capture, because the mapping flags depend on it. 5724 bool IsFirstComponentList = true; 5725 5726 const ValueDecl *VD = 5727 Cap->capturesThis() 5728 ? nullptr 5729 : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); 5730 5731 // If this declaration appears in a is_device_ptr clause we just have to 5732 // pass the pointer by value. If it is a reference to a declaration, we just 5733 // pass its value, otherwise, if it is a member expression, we need to map 5734 // 'to' the field. 5735 if (!VD) { 5736 auto It = DevPointersMap.find(VD); 5737 if (It != DevPointersMap.end()) { 5738 for (auto L : It->second) { 5739 generateInfoForComponentList( 5740 /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, 5741 BasePointers, Pointers, Sizes, Types, IsFirstComponentList); 5742 IsFirstComponentList = false; 5743 } 5744 return; 5745 } 5746 } else if (DevPointersMap.count(VD)) { 5747 BasePointers.push_back({Arg, VD}); 5748 Pointers.push_back(Arg); 5749 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 5750 Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF); 5751 return; 5752 } 5753 5754 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5755 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 5756 for (auto L : C->decl_component_lists(VD)) { 5757 assert(L.first == VD && 5758 "We got information for the wrong declaration??"); 5759 assert(!L.second.empty() && 5760 "Not expecting declaration with no component lists."); 5761 generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), 5762 L.second, BasePointers, Pointers, Sizes, 5763 Types, IsFirstComponentList); 5764 IsFirstComponentList = false; 5765 } 5766 5767 return; 5768 } 5769 5770 /// \brief Generate the default map information for a given capture \a CI, 5771 /// record field declaration \a RI and captured value \a CV. 5772 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 5773 const FieldDecl &RI, llvm::Value *CV, 5774 MapBaseValuesArrayTy &CurBasePointers, 5775 MapValuesArrayTy &CurPointers, 5776 MapValuesArrayTy &CurSizes, 5777 MapFlagsArrayTy &CurMapTypes) { 5778 5779 // Do the default mapping. 5780 if (CI.capturesThis()) { 5781 CurBasePointers.push_back(CV); 5782 CurPointers.push_back(CV); 5783 const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 5784 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 5785 // Default map type. 5786 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 5787 } else if (CI.capturesVariableByCopy()) { 5788 CurBasePointers.push_back(CV); 5789 CurPointers.push_back(CV); 5790 if (!RI.getType()->isAnyPointerType()) { 5791 // We have to signal to the runtime captures passed by value that are 5792 // not pointers. 5793 CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); 5794 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 5795 } else { 5796 // Pointers are implicitly mapped with a zero size and no flags 5797 // (other than first map that is added for all implicit maps). 5798 CurMapTypes.push_back(0u); 5799 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 5800 } 5801 } else { 5802 assert(CI.capturesVariable() && "Expected captured reference."); 5803 CurBasePointers.push_back(CV); 5804 CurPointers.push_back(CV); 5805 5806 const ReferenceType *PtrTy = 5807 cast<ReferenceType>(RI.getType().getTypePtr()); 5808 QualType ElementType = PtrTy->getPointeeType(); 5809 CurSizes.push_back(CGF.getTypeSize(ElementType)); 5810 // The default map type for a scalar/complex type is 'to' because by 5811 // default the value doesn't have to be retrieved. For an aggregate 5812 // type, the default is 'tofrom'. 5813 CurMapTypes.push_back(ElementType->isAggregateType() 5814 ? (OMP_MAP_TO | OMP_MAP_FROM) 5815 : OMP_MAP_TO); 5816 5817 // If we have a capture by reference we may need to add the private 5818 // pointer flag if the base declaration shows in some first-private 5819 // clause. 5820 CurMapTypes.back() = 5821 adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); 5822 } 5823 // Every default map produces a single argument, so, it is always the 5824 // first one. 5825 CurMapTypes.back() |= OMP_MAP_FIRST_REF; 5826 } 5827 }; 5828 5829 enum OpenMPOffloadingReservedDeviceIDs { 5830 /// \brief Device ID if the device was not defined, runtime should get it 5831 /// from environment variables in the spec. 5832 OMP_DEVICEID_UNDEF = -1, 5833 }; 5834 } // anonymous namespace 5835 5836 /// \brief Emit the arrays used to pass the captures and map information to the 5837 /// offloading runtime library. If there is no map or capture information, 5838 /// return nullptr by reference. 5839 static void 5840 emitOffloadingArrays(CodeGenFunction &CGF, 5841 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 5842 MappableExprsHandler::MapValuesArrayTy &Pointers, 5843 MappableExprsHandler::MapValuesArrayTy &Sizes, 5844 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 5845 CGOpenMPRuntime::TargetDataInfo &Info) { 5846 auto &CGM = CGF.CGM; 5847 auto &Ctx = CGF.getContext(); 5848 5849 // Reset the array information. 5850 Info.clearArrayInfo(); 5851 Info.NumberOfPtrs = BasePointers.size(); 5852 5853 if (Info.NumberOfPtrs) { 5854 // Detect if we have any capture size requiring runtime evaluation of the 5855 // size so that a constant array could be eventually used. 5856 bool hasRuntimeEvaluationCaptureSize = false; 5857 for (auto *S : Sizes) 5858 if (!isa<llvm::Constant>(S)) { 5859 hasRuntimeEvaluationCaptureSize = true; 5860 break; 5861 } 5862 5863 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 5864 QualType PointerArrayType = 5865 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 5866 /*IndexTypeQuals=*/0); 5867 5868 Info.BasePointersArray = 5869 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 5870 Info.PointersArray = 5871 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 5872 5873 // If we don't have any VLA types or other types that require runtime 5874 // evaluation, we can use a constant array for the map sizes, otherwise we 5875 // need to fill up the arrays as we do for the pointers. 5876 if (hasRuntimeEvaluationCaptureSize) { 5877 QualType SizeArrayType = Ctx.getConstantArrayType( 5878 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 5879 /*IndexTypeQuals=*/0); 5880 Info.SizesArray = 5881 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 5882 } else { 5883 // We expect all the sizes to be constant, so we collect them to create 5884 // a constant array. 5885 SmallVector<llvm::Constant *, 16> ConstSizes; 5886 for (auto S : Sizes) 5887 ConstSizes.push_back(cast<llvm::Constant>(S)); 5888 5889 auto *SizesArrayInit = llvm::ConstantArray::get( 5890 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 5891 auto *SizesArrayGbl = new llvm::GlobalVariable( 5892 CGM.getModule(), SizesArrayInit->getType(), 5893 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5894 SizesArrayInit, ".offload_sizes"); 5895 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 5896 Info.SizesArray = SizesArrayGbl; 5897 } 5898 5899 // The map types are always constant so we don't need to generate code to 5900 // fill arrays. Instead, we create an array constant. 5901 llvm::Constant *MapTypesArrayInit = 5902 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 5903 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 5904 CGM.getModule(), MapTypesArrayInit->getType(), 5905 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5906 MapTypesArrayInit, ".offload_maptypes"); 5907 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 5908 Info.MapTypesArray = MapTypesArrayGbl; 5909 5910 for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { 5911 llvm::Value *BPVal = *BasePointers[i]; 5912 if (BPVal->getType()->isPointerTy()) 5913 BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); 5914 else { 5915 assert(BPVal->getType()->isIntegerTy() && 5916 "If not a pointer, the value type must be an integer."); 5917 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); 5918 } 5919 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 5920 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5921 Info.BasePointersArray, 0, i); 5922 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5923 CGF.Builder.CreateStore(BPVal, BPAddr); 5924 5925 if (Info.requiresDevicePointerInfo()) 5926 if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) 5927 Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); 5928 5929 llvm::Value *PVal = Pointers[i]; 5930 if (PVal->getType()->isPointerTy()) 5931 PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); 5932 else { 5933 assert(PVal->getType()->isIntegerTy() && 5934 "If not a pointer, the value type must be an integer."); 5935 PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); 5936 } 5937 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 5938 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5939 Info.PointersArray, 0, i); 5940 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5941 CGF.Builder.CreateStore(PVal, PAddr); 5942 5943 if (hasRuntimeEvaluationCaptureSize) { 5944 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 5945 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 5946 Info.SizesArray, 5947 /*Idx0=*/0, 5948 /*Idx1=*/i); 5949 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 5950 CGF.Builder.CreateStore( 5951 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 5952 SAddr); 5953 } 5954 } 5955 } 5956 } 5957 /// \brief Emit the arguments to be passed to the runtime library based on the 5958 /// arrays of pointers, sizes and map types. 5959 static void emitOffloadingArraysArgument( 5960 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 5961 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 5962 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 5963 auto &CGM = CGF.CGM; 5964 if (Info.NumberOfPtrs) { 5965 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5966 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5967 Info.BasePointersArray, 5968 /*Idx0=*/0, /*Idx1=*/0); 5969 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5970 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5971 Info.PointersArray, 5972 /*Idx0=*/0, 5973 /*Idx1=*/0); 5974 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5975 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 5976 /*Idx0=*/0, /*Idx1=*/0); 5977 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5978 llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), 5979 Info.MapTypesArray, 5980 /*Idx0=*/0, 5981 /*Idx1=*/0); 5982 } else { 5983 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5984 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5985 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 5986 MapTypesArrayArg = 5987 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 5988 } 5989 } 5990 5991 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 5992 const OMPExecutableDirective &D, 5993 llvm::Value *OutlinedFn, 5994 llvm::Value *OutlinedFnID, 5995 const Expr *IfCond, const Expr *Device, 5996 ArrayRef<llvm::Value *> CapturedVars) { 5997 if (!CGF.HaveInsertPoint()) 5998 return; 5999 6000 assert(OutlinedFn && "Invalid outlined function!"); 6001 6002 auto &Ctx = CGF.getContext(); 6003 6004 // Fill up the arrays with all the captured variables. 6005 MappableExprsHandler::MapValuesArrayTy KernelArgs; 6006 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6007 MappableExprsHandler::MapValuesArrayTy Pointers; 6008 MappableExprsHandler::MapValuesArrayTy Sizes; 6009 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6010 6011 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 6012 MappableExprsHandler::MapValuesArrayTy CurPointers; 6013 MappableExprsHandler::MapValuesArrayTy CurSizes; 6014 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 6015 6016 // Get mappable expression information. 6017 MappableExprsHandler MEHandler(D, CGF); 6018 6019 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 6020 auto RI = CS.getCapturedRecordDecl()->field_begin(); 6021 auto CV = CapturedVars.begin(); 6022 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 6023 CE = CS.capture_end(); 6024 CI != CE; ++CI, ++RI, ++CV) { 6025 StringRef Name; 6026 QualType Ty; 6027 6028 CurBasePointers.clear(); 6029 CurPointers.clear(); 6030 CurSizes.clear(); 6031 CurMapTypes.clear(); 6032 6033 // VLA sizes are passed to the outlined region by copy and do not have map 6034 // information associated. 6035 if (CI->capturesVariableArrayType()) { 6036 CurBasePointers.push_back(*CV); 6037 CurPointers.push_back(*CV); 6038 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 6039 // Copy to the device as an argument. No need to retrieve it. 6040 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | 6041 MappableExprsHandler::OMP_MAP_FIRST_REF); 6042 } else { 6043 // If we have any information in the map clause, we use it, otherwise we 6044 // just do a default mapping. 6045 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 6046 CurSizes, CurMapTypes); 6047 if (CurBasePointers.empty()) 6048 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 6049 CurPointers, CurSizes, CurMapTypes); 6050 } 6051 // We expect to have at least an element of information for this capture. 6052 assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); 6053 assert(CurBasePointers.size() == CurPointers.size() && 6054 CurBasePointers.size() == CurSizes.size() && 6055 CurBasePointers.size() == CurMapTypes.size() && 6056 "Inconsistent map information sizes!"); 6057 6058 // The kernel args are always the first elements of the base pointers 6059 // associated with a capture. 6060 KernelArgs.push_back(*CurBasePointers.front()); 6061 // We need to append the results of this capture to what we already have. 6062 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 6063 Pointers.append(CurPointers.begin(), CurPointers.end()); 6064 Sizes.append(CurSizes.begin(), CurSizes.end()); 6065 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 6066 } 6067 6068 // Keep track on whether the host function has to be executed. 6069 auto OffloadErrorQType = 6070 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 6071 auto OffloadError = CGF.MakeAddrLValue( 6072 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 6073 OffloadErrorQType); 6074 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 6075 OffloadError); 6076 6077 // Fill up the pointer arrays and transfer execution to the device. 6078 auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device, 6079 OutlinedFnID, OffloadError, 6080 &D](CodeGenFunction &CGF, PrePostActionTy &) { 6081 auto &RT = CGF.CGM.getOpenMPRuntime(); 6082 // Emit the offloading arrays. 6083 TargetDataInfo Info; 6084 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6085 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 6086 Info.PointersArray, Info.SizesArray, 6087 Info.MapTypesArray, Info); 6088 6089 // On top of the arrays that were filled up, the target offloading call 6090 // takes as arguments the device id as well as the host pointer. The host 6091 // pointer is used by the runtime library to identify the current target 6092 // region, so it only has to be unique and not necessarily point to 6093 // anything. It could be the pointer to the outlined function that 6094 // implements the target region, but we aren't using that so that the 6095 // compiler doesn't need to keep that, and could therefore inline the host 6096 // function if proven worthwhile during optimization. 6097 6098 // From this point on, we need to have an ID of the target region defined. 6099 assert(OutlinedFnID && "Invalid outlined function ID!"); 6100 6101 // Emit device ID if any. 6102 llvm::Value *DeviceID; 6103 if (Device) 6104 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6105 CGF.Int32Ty, /*isSigned=*/true); 6106 else 6107 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6108 6109 // Emit the number of elements in the offloading arrays. 6110 llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6111 6112 // Return value of the runtime offloading call. 6113 llvm::Value *Return; 6114 6115 auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); 6116 auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); 6117 6118 // The target region is an outlined function launched by the runtime 6119 // via calls __tgt_target() or __tgt_target_teams(). 6120 // 6121 // __tgt_target() launches a target region with one team and one thread, 6122 // executing a serial region. This master thread may in turn launch 6123 // more threads within its team upon encountering a parallel region, 6124 // however, no additional teams can be launched on the device. 6125 // 6126 // __tgt_target_teams() launches a target region with one or more teams, 6127 // each with one or more threads. This call is required for target 6128 // constructs such as: 6129 // 'target teams' 6130 // 'target' / 'teams' 6131 // 'target teams distribute parallel for' 6132 // 'target parallel' 6133 // and so on. 6134 // 6135 // Note that on the host and CPU targets, the runtime implementation of 6136 // these calls simply call the outlined function without forking threads. 6137 // The outlined functions themselves have runtime calls to 6138 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 6139 // the compiler in emitTeamsCall() and emitParallelCall(). 6140 // 6141 // In contrast, on the NVPTX target, the implementation of 6142 // __tgt_target_teams() launches a GPU kernel with the requested number 6143 // of teams and threads so no additional calls to the runtime are required. 6144 if (NumTeams) { 6145 // If we have NumTeams defined this means that we have an enclosed teams 6146 // region. Therefore we also expect to have NumThreads defined. These two 6147 // values should be defined in the presence of a teams directive, 6148 // regardless of having any clauses associated. If the user is using teams 6149 // but no clauses, these two values will be the default that should be 6150 // passed to the runtime library - a 32-bit integer with the value zero. 6151 assert(NumThreads && "Thread limit expression should be available along " 6152 "with number of teams."); 6153 llvm::Value *OffloadingArgs[] = { 6154 DeviceID, OutlinedFnID, 6155 PointerNum, Info.BasePointersArray, 6156 Info.PointersArray, Info.SizesArray, 6157 Info.MapTypesArray, NumTeams, 6158 NumThreads}; 6159 Return = CGF.EmitRuntimeCall( 6160 RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); 6161 } else { 6162 llvm::Value *OffloadingArgs[] = { 6163 DeviceID, OutlinedFnID, 6164 PointerNum, Info.BasePointersArray, 6165 Info.PointersArray, Info.SizesArray, 6166 Info.MapTypesArray}; 6167 Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), 6168 OffloadingArgs); 6169 } 6170 6171 CGF.EmitStoreOfScalar(Return, OffloadError); 6172 }; 6173 6174 // Notify that the host version must be executed. 6175 auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { 6176 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), 6177 OffloadError); 6178 }; 6179 6180 // If we have a target function ID it means that we need to support 6181 // offloading, otherwise, just execute on the host. We need to execute on host 6182 // regardless of the conditional in the if clause if, e.g., the user do not 6183 // specify target triples. 6184 if (OutlinedFnID) { 6185 if (IfCond) 6186 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6187 else { 6188 RegionCodeGenTy ThenRCG(ThenGen); 6189 ThenRCG(CGF); 6190 } 6191 } else { 6192 RegionCodeGenTy ElseRCG(ElseGen); 6193 ElseRCG(CGF); 6194 } 6195 6196 // Check the error code and execute the host version if required. 6197 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 6198 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 6199 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 6200 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 6201 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 6202 6203 CGF.EmitBlock(OffloadFailedBlock); 6204 CGF.Builder.CreateCall(OutlinedFn, KernelArgs); 6205 CGF.EmitBranch(OffloadContBlock); 6206 6207 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 6208 } 6209 6210 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 6211 StringRef ParentName) { 6212 if (!S) 6213 return; 6214 6215 // Codegen OMP target directives that offload compute to the device. 6216 bool requiresDeviceCodegen = 6217 isa<OMPExecutableDirective>(S) && 6218 isOpenMPTargetExecutionDirective( 6219 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 6220 6221 if (requiresDeviceCodegen) { 6222 auto &E = *cast<OMPExecutableDirective>(S); 6223 unsigned DeviceID; 6224 unsigned FileID; 6225 unsigned Line; 6226 getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, 6227 FileID, Line); 6228 6229 // Is this a target region that should not be emitted as an entry point? If 6230 // so just signal we are done with this target region. 6231 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 6232 ParentName, Line)) 6233 return; 6234 6235 switch (S->getStmtClass()) { 6236 case Stmt::OMPTargetDirectiveClass: 6237 CodeGenFunction::EmitOMPTargetDeviceFunction( 6238 CGM, ParentName, cast<OMPTargetDirective>(*S)); 6239 break; 6240 case Stmt::OMPTargetParallelDirectiveClass: 6241 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 6242 CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); 6243 break; 6244 case Stmt::OMPTargetTeamsDirectiveClass: 6245 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 6246 CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); 6247 break; 6248 default: 6249 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 6250 } 6251 return; 6252 } 6253 6254 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 6255 if (!E->hasAssociatedStmt()) 6256 return; 6257 6258 scanForTargetRegionsFunctions( 6259 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 6260 ParentName); 6261 return; 6262 } 6263 6264 // If this is a lambda function, look into its body. 6265 if (auto *L = dyn_cast<LambdaExpr>(S)) 6266 S = L->getBody(); 6267 6268 // Keep looking for target regions recursively. 6269 for (auto *II : S->children()) 6270 scanForTargetRegionsFunctions(II, ParentName); 6271 } 6272 6273 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 6274 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 6275 6276 // If emitting code for the host, we do not process FD here. Instead we do 6277 // the normal code generation. 6278 if (!CGM.getLangOpts().OpenMPIsDevice) 6279 return false; 6280 6281 // Try to detect target regions in the function. 6282 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 6283 6284 // We should not emit any function other that the ones created during the 6285 // scanning. Therefore, we signal that this function is completely dealt 6286 // with. 6287 return true; 6288 } 6289 6290 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 6291 if (!CGM.getLangOpts().OpenMPIsDevice) 6292 return false; 6293 6294 // Check if there are Ctors/Dtors in this declaration and look for target 6295 // regions in it. We use the complete variant to produce the kernel name 6296 // mangling. 6297 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 6298 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 6299 for (auto *Ctor : RD->ctors()) { 6300 StringRef ParentName = 6301 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 6302 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 6303 } 6304 auto *Dtor = RD->getDestructor(); 6305 if (Dtor) { 6306 StringRef ParentName = 6307 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 6308 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 6309 } 6310 } 6311 6312 // If we are in target mode we do not emit any global (declare target is not 6313 // implemented yet). Therefore we signal that GD was processed in this case. 6314 return true; 6315 } 6316 6317 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 6318 auto *VD = GD.getDecl(); 6319 if (isa<FunctionDecl>(VD)) 6320 return emitTargetFunctions(GD); 6321 6322 return emitTargetGlobalVariable(GD); 6323 } 6324 6325 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 6326 // If we have offloading in the current module, we need to emit the entries 6327 // now and register the offloading descriptor. 6328 createOffloadEntriesAndInfoMetadata(); 6329 6330 // Create and register the offloading binary descriptors. This is the main 6331 // entity that captures all the information about offloading in the current 6332 // compilation unit. 6333 return createOffloadingBinaryDescriptorRegistration(); 6334 } 6335 6336 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 6337 const OMPExecutableDirective &D, 6338 SourceLocation Loc, 6339 llvm::Value *OutlinedFn, 6340 ArrayRef<llvm::Value *> CapturedVars) { 6341 if (!CGF.HaveInsertPoint()) 6342 return; 6343 6344 auto *RTLoc = emitUpdateLocation(CGF, Loc); 6345 CodeGenFunction::RunCleanupsScope Scope(CGF); 6346 6347 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 6348 llvm::Value *Args[] = { 6349 RTLoc, 6350 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 6351 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 6352 llvm::SmallVector<llvm::Value *, 16> RealArgs; 6353 RealArgs.append(std::begin(Args), std::end(Args)); 6354 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 6355 6356 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 6357 CGF.EmitRuntimeCall(RTLFn, RealArgs); 6358 } 6359 6360 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 6361 const Expr *NumTeams, 6362 const Expr *ThreadLimit, 6363 SourceLocation Loc) { 6364 if (!CGF.HaveInsertPoint()) 6365 return; 6366 6367 auto *RTLoc = emitUpdateLocation(CGF, Loc); 6368 6369 llvm::Value *NumTeamsVal = 6370 (NumTeams) 6371 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 6372 CGF.CGM.Int32Ty, /* isSigned = */ true) 6373 : CGF.Builder.getInt32(0); 6374 6375 llvm::Value *ThreadLimitVal = 6376 (ThreadLimit) 6377 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 6378 CGF.CGM.Int32Ty, /* isSigned = */ true) 6379 : CGF.Builder.getInt32(0); 6380 6381 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 6382 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 6383 ThreadLimitVal}; 6384 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 6385 PushNumTeamsArgs); 6386 } 6387 6388 void CGOpenMPRuntime::emitTargetDataCalls( 6389 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6390 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 6391 if (!CGF.HaveInsertPoint()) 6392 return; 6393 6394 // Action used to replace the default codegen action and turn privatization 6395 // off. 6396 PrePostActionTy NoPrivAction; 6397 6398 // Generate the code for the opening of the data environment. Capture all the 6399 // arguments of the runtime call by reference because they are used in the 6400 // closing of the region. 6401 auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF, 6402 PrePostActionTy &) { 6403 // Fill up the arrays with all the mapped variables. 6404 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6405 MappableExprsHandler::MapValuesArrayTy Pointers; 6406 MappableExprsHandler::MapValuesArrayTy Sizes; 6407 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6408 6409 // Get map clause information. 6410 MappableExprsHandler MCHandler(D, CGF); 6411 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6412 6413 // Fill up the arrays and create the arguments. 6414 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6415 6416 llvm::Value *BasePointersArrayArg = nullptr; 6417 llvm::Value *PointersArrayArg = nullptr; 6418 llvm::Value *SizesArrayArg = nullptr; 6419 llvm::Value *MapTypesArrayArg = nullptr; 6420 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 6421 SizesArrayArg, MapTypesArrayArg, Info); 6422 6423 // Emit device ID if any. 6424 llvm::Value *DeviceID = nullptr; 6425 if (Device) 6426 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6427 CGF.Int32Ty, /*isSigned=*/true); 6428 else 6429 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6430 6431 // Emit the number of elements in the offloading arrays. 6432 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 6433 6434 llvm::Value *OffloadingArgs[] = { 6435 DeviceID, PointerNum, BasePointersArrayArg, 6436 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6437 auto &RT = CGF.CGM.getOpenMPRuntime(); 6438 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), 6439 OffloadingArgs); 6440 6441 // If device pointer privatization is required, emit the body of the region 6442 // here. It will have to be duplicated: with and without privatization. 6443 if (!Info.CaptureDeviceAddrMap.empty()) 6444 CodeGen(CGF); 6445 }; 6446 6447 // Generate code for the closing of the data region. 6448 auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { 6449 assert(Info.isValid() && "Invalid data environment closing arguments."); 6450 6451 llvm::Value *BasePointersArrayArg = nullptr; 6452 llvm::Value *PointersArrayArg = nullptr; 6453 llvm::Value *SizesArrayArg = nullptr; 6454 llvm::Value *MapTypesArrayArg = nullptr; 6455 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 6456 SizesArrayArg, MapTypesArrayArg, Info); 6457 6458 // Emit device ID if any. 6459 llvm::Value *DeviceID = nullptr; 6460 if (Device) 6461 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6462 CGF.Int32Ty, /*isSigned=*/true); 6463 else 6464 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6465 6466 // Emit the number of elements in the offloading arrays. 6467 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 6468 6469 llvm::Value *OffloadingArgs[] = { 6470 DeviceID, PointerNum, BasePointersArrayArg, 6471 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6472 auto &RT = CGF.CGM.getOpenMPRuntime(); 6473 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), 6474 OffloadingArgs); 6475 }; 6476 6477 // If we need device pointer privatization, we need to emit the body of the 6478 // region with no privatization in the 'else' branch of the conditional. 6479 // Otherwise, we don't have to do anything. 6480 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 6481 PrePostActionTy &) { 6482 if (!Info.CaptureDeviceAddrMap.empty()) { 6483 CodeGen.setAction(NoPrivAction); 6484 CodeGen(CGF); 6485 } 6486 }; 6487 6488 // We don't have to do anything to close the region if the if clause evaluates 6489 // to false. 6490 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6491 6492 if (IfCond) { 6493 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 6494 } else { 6495 RegionCodeGenTy RCG(BeginThenGen); 6496 RCG(CGF); 6497 } 6498 6499 // If we don't require privatization of device pointers, we emit the body in 6500 // between the runtime calls. This avoids duplicating the body code. 6501 if (Info.CaptureDeviceAddrMap.empty()) { 6502 CodeGen.setAction(NoPrivAction); 6503 CodeGen(CGF); 6504 } 6505 6506 if (IfCond) { 6507 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 6508 } else { 6509 RegionCodeGenTy RCG(EndThenGen); 6510 RCG(CGF); 6511 } 6512 } 6513 6514 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 6515 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6516 const Expr *Device) { 6517 if (!CGF.HaveInsertPoint()) 6518 return; 6519 6520 assert((isa<OMPTargetEnterDataDirective>(D) || 6521 isa<OMPTargetExitDataDirective>(D) || 6522 isa<OMPTargetUpdateDirective>(D)) && 6523 "Expecting either target enter, exit data, or update directives."); 6524 6525 // Generate the code for the opening of the data environment. 6526 auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { 6527 // Fill up the arrays with all the mapped variables. 6528 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6529 MappableExprsHandler::MapValuesArrayTy Pointers; 6530 MappableExprsHandler::MapValuesArrayTy Sizes; 6531 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6532 6533 // Get map clause information. 6534 MappableExprsHandler MEHandler(D, CGF); 6535 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6536 6537 // Fill up the arrays and create the arguments. 6538 TargetDataInfo Info; 6539 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6540 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 6541 Info.PointersArray, Info.SizesArray, 6542 Info.MapTypesArray, Info); 6543 6544 // Emit device ID if any. 6545 llvm::Value *DeviceID = nullptr; 6546 if (Device) 6547 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6548 CGF.Int32Ty, /*isSigned=*/true); 6549 else 6550 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6551 6552 // Emit the number of elements in the offloading arrays. 6553 auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6554 6555 llvm::Value *OffloadingArgs[] = { 6556 DeviceID, PointerNum, Info.BasePointersArray, 6557 Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; 6558 6559 auto &RT = CGF.CGM.getOpenMPRuntime(); 6560 // Select the right runtime function call for each expected standalone 6561 // directive. 6562 OpenMPRTLFunction RTLFn; 6563 switch (D.getDirectiveKind()) { 6564 default: 6565 llvm_unreachable("Unexpected standalone target data directive."); 6566 break; 6567 case OMPD_target_enter_data: 6568 RTLFn = OMPRTL__tgt_target_data_begin; 6569 break; 6570 case OMPD_target_exit_data: 6571 RTLFn = OMPRTL__tgt_target_data_end; 6572 break; 6573 case OMPD_target_update: 6574 RTLFn = OMPRTL__tgt_target_data_update; 6575 break; 6576 } 6577 CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); 6578 }; 6579 6580 // In the event we get an if clause, we don't have to take any action on the 6581 // else side. 6582 auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6583 6584 if (IfCond) { 6585 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6586 } else { 6587 RegionCodeGenTy ThenGenRCG(ThenGen); 6588 ThenGenRCG(CGF); 6589 } 6590 } 6591 6592 namespace { 6593 /// Kind of parameter in a function with 'declare simd' directive. 6594 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 6595 /// Attribute set of the parameter. 6596 struct ParamAttrTy { 6597 ParamKindTy Kind = Vector; 6598 llvm::APSInt StrideOrArg; 6599 llvm::APSInt Alignment; 6600 }; 6601 } // namespace 6602 6603 static unsigned evaluateCDTSize(const FunctionDecl *FD, 6604 ArrayRef<ParamAttrTy> ParamAttrs) { 6605 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 6606 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 6607 // of that clause. The VLEN value must be power of 2. 6608 // In other case the notion of the function`s "characteristic data type" (CDT) 6609 // is used to compute the vector length. 6610 // CDT is defined in the following order: 6611 // a) For non-void function, the CDT is the return type. 6612 // b) If the function has any non-uniform, non-linear parameters, then the 6613 // CDT is the type of the first such parameter. 6614 // c) If the CDT determined by a) or b) above is struct, union, or class 6615 // type which is pass-by-value (except for the type that maps to the 6616 // built-in complex data type), the characteristic data type is int. 6617 // d) If none of the above three cases is applicable, the CDT is int. 6618 // The VLEN is then determined based on the CDT and the size of vector 6619 // register of that ISA for which current vector version is generated. The 6620 // VLEN is computed using the formula below: 6621 // VLEN = sizeof(vector_register) / sizeof(CDT), 6622 // where vector register size specified in section 3.2.1 Registers and the 6623 // Stack Frame of original AMD64 ABI document. 6624 QualType RetType = FD->getReturnType(); 6625 if (RetType.isNull()) 6626 return 0; 6627 ASTContext &C = FD->getASTContext(); 6628 QualType CDT; 6629 if (!RetType.isNull() && !RetType->isVoidType()) 6630 CDT = RetType; 6631 else { 6632 unsigned Offset = 0; 6633 if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 6634 if (ParamAttrs[Offset].Kind == Vector) 6635 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 6636 ++Offset; 6637 } 6638 if (CDT.isNull()) { 6639 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 6640 if (ParamAttrs[I + Offset].Kind == Vector) { 6641 CDT = FD->getParamDecl(I)->getType(); 6642 break; 6643 } 6644 } 6645 } 6646 } 6647 if (CDT.isNull()) 6648 CDT = C.IntTy; 6649 CDT = CDT->getCanonicalTypeUnqualified(); 6650 if (CDT->isRecordType() || CDT->isUnionType()) 6651 CDT = C.IntTy; 6652 return C.getTypeSize(CDT); 6653 } 6654 6655 static void 6656 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 6657 const llvm::APSInt &VLENVal, 6658 ArrayRef<ParamAttrTy> ParamAttrs, 6659 OMPDeclareSimdDeclAttr::BranchStateTy State) { 6660 struct ISADataTy { 6661 char ISA; 6662 unsigned VecRegSize; 6663 }; 6664 ISADataTy ISAData[] = { 6665 { 6666 'b', 128 6667 }, // SSE 6668 { 6669 'c', 256 6670 }, // AVX 6671 { 6672 'd', 256 6673 }, // AVX2 6674 { 6675 'e', 512 6676 }, // AVX512 6677 }; 6678 llvm::SmallVector<char, 2> Masked; 6679 switch (State) { 6680 case OMPDeclareSimdDeclAttr::BS_Undefined: 6681 Masked.push_back('N'); 6682 Masked.push_back('M'); 6683 break; 6684 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 6685 Masked.push_back('N'); 6686 break; 6687 case OMPDeclareSimdDeclAttr::BS_Inbranch: 6688 Masked.push_back('M'); 6689 break; 6690 } 6691 for (auto Mask : Masked) { 6692 for (auto &Data : ISAData) { 6693 SmallString<256> Buffer; 6694 llvm::raw_svector_ostream Out(Buffer); 6695 Out << "_ZGV" << Data.ISA << Mask; 6696 if (!VLENVal) { 6697 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 6698 evaluateCDTSize(FD, ParamAttrs)); 6699 } else 6700 Out << VLENVal; 6701 for (auto &ParamAttr : ParamAttrs) { 6702 switch (ParamAttr.Kind){ 6703 case LinearWithVarStride: 6704 Out << 's' << ParamAttr.StrideOrArg; 6705 break; 6706 case Linear: 6707 Out << 'l'; 6708 if (!!ParamAttr.StrideOrArg) 6709 Out << ParamAttr.StrideOrArg; 6710 break; 6711 case Uniform: 6712 Out << 'u'; 6713 break; 6714 case Vector: 6715 Out << 'v'; 6716 break; 6717 } 6718 if (!!ParamAttr.Alignment) 6719 Out << 'a' << ParamAttr.Alignment; 6720 } 6721 Out << '_' << Fn->getName(); 6722 Fn->addFnAttr(Out.str()); 6723 } 6724 } 6725 } 6726 6727 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 6728 llvm::Function *Fn) { 6729 ASTContext &C = CGM.getContext(); 6730 FD = FD->getCanonicalDecl(); 6731 // Map params to their positions in function decl. 6732 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 6733 if (isa<CXXMethodDecl>(FD)) 6734 ParamPositions.insert({FD, 0}); 6735 unsigned ParamPos = ParamPositions.size(); 6736 for (auto *P : FD->parameters()) { 6737 ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); 6738 ++ParamPos; 6739 } 6740 for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 6741 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 6742 // Mark uniform parameters. 6743 for (auto *E : Attr->uniforms()) { 6744 E = E->IgnoreParenImpCasts(); 6745 unsigned Pos; 6746 if (isa<CXXThisExpr>(E)) 6747 Pos = ParamPositions[FD]; 6748 else { 6749 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6750 ->getCanonicalDecl(); 6751 Pos = ParamPositions[PVD]; 6752 } 6753 ParamAttrs[Pos].Kind = Uniform; 6754 } 6755 // Get alignment info. 6756 auto NI = Attr->alignments_begin(); 6757 for (auto *E : Attr->aligneds()) { 6758 E = E->IgnoreParenImpCasts(); 6759 unsigned Pos; 6760 QualType ParmTy; 6761 if (isa<CXXThisExpr>(E)) { 6762 Pos = ParamPositions[FD]; 6763 ParmTy = E->getType(); 6764 } else { 6765 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6766 ->getCanonicalDecl(); 6767 Pos = ParamPositions[PVD]; 6768 ParmTy = PVD->getType(); 6769 } 6770 ParamAttrs[Pos].Alignment = 6771 (*NI) ? (*NI)->EvaluateKnownConstInt(C) 6772 : llvm::APSInt::getUnsigned( 6773 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 6774 .getQuantity()); 6775 ++NI; 6776 } 6777 // Mark linear parameters. 6778 auto SI = Attr->steps_begin(); 6779 auto MI = Attr->modifiers_begin(); 6780 for (auto *E : Attr->linears()) { 6781 E = E->IgnoreParenImpCasts(); 6782 unsigned Pos; 6783 if (isa<CXXThisExpr>(E)) 6784 Pos = ParamPositions[FD]; 6785 else { 6786 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6787 ->getCanonicalDecl(); 6788 Pos = ParamPositions[PVD]; 6789 } 6790 auto &ParamAttr = ParamAttrs[Pos]; 6791 ParamAttr.Kind = Linear; 6792 if (*SI) { 6793 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 6794 Expr::SE_AllowSideEffects)) { 6795 if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 6796 if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 6797 ParamAttr.Kind = LinearWithVarStride; 6798 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 6799 ParamPositions[StridePVD->getCanonicalDecl()]); 6800 } 6801 } 6802 } 6803 } 6804 ++SI; 6805 ++MI; 6806 } 6807 llvm::APSInt VLENVal; 6808 if (const Expr *VLEN = Attr->getSimdlen()) 6809 VLENVal = VLEN->EvaluateKnownConstInt(C); 6810 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 6811 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 6812 CGM.getTriple().getArch() == llvm::Triple::x86_64) 6813 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 6814 } 6815 } 6816 6817 namespace { 6818 /// Cleanup action for doacross support. 6819 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 6820 public: 6821 static const int DoacrossFinArgs = 2; 6822 6823 private: 6824 llvm::Value *RTLFn; 6825 llvm::Value *Args[DoacrossFinArgs]; 6826 6827 public: 6828 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 6829 : RTLFn(RTLFn) { 6830 assert(CallArgs.size() == DoacrossFinArgs); 6831 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 6832 } 6833 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 6834 if (!CGF.HaveInsertPoint()) 6835 return; 6836 CGF.EmitRuntimeCall(RTLFn, Args); 6837 } 6838 }; 6839 } // namespace 6840 6841 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 6842 const OMPLoopDirective &D) { 6843 if (!CGF.HaveInsertPoint()) 6844 return; 6845 6846 ASTContext &C = CGM.getContext(); 6847 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 6848 RecordDecl *RD; 6849 if (KmpDimTy.isNull()) { 6850 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 6851 // kmp_int64 lo; // lower 6852 // kmp_int64 up; // upper 6853 // kmp_int64 st; // stride 6854 // }; 6855 RD = C.buildImplicitRecord("kmp_dim"); 6856 RD->startDefinition(); 6857 addFieldToRecordDecl(C, RD, Int64Ty); 6858 addFieldToRecordDecl(C, RD, Int64Ty); 6859 addFieldToRecordDecl(C, RD, Int64Ty); 6860 RD->completeDefinition(); 6861 KmpDimTy = C.getRecordType(RD); 6862 } else 6863 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 6864 6865 Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); 6866 CGF.EmitNullInitialization(DimsAddr, KmpDimTy); 6867 enum { LowerFD = 0, UpperFD, StrideFD }; 6868 // Fill dims with data. 6869 LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); 6870 // dims.upper = num_iterations; 6871 LValue UpperLVal = 6872 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); 6873 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 6874 CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), 6875 Int64Ty, D.getNumIterations()->getExprLoc()); 6876 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 6877 // dims.stride = 1; 6878 LValue StrideLVal = 6879 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); 6880 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 6881 StrideLVal); 6882 6883 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 6884 // kmp_int32 num_dims, struct kmp_dim * dims); 6885 llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), 6886 getThreadID(CGF, D.getLocStart()), 6887 llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), 6888 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6889 DimsAddr.getPointer(), CGM.VoidPtrTy)}; 6890 6891 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 6892 CGF.EmitRuntimeCall(RTLFn, Args); 6893 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 6894 emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; 6895 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 6896 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 6897 llvm::makeArrayRef(FiniArgs)); 6898 } 6899 6900 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 6901 const OMPDependClause *C) { 6902 QualType Int64Ty = 6903 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 6904 const Expr *CounterVal = C->getCounterValue(); 6905 assert(CounterVal); 6906 llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), 6907 CounterVal->getType(), Int64Ty, 6908 CounterVal->getExprLoc()); 6909 Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); 6910 CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); 6911 llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), 6912 getThreadID(CGF, C->getLocStart()), 6913 CntAddr.getPointer()}; 6914 llvm::Value *RTLFn; 6915 if (C->getDependencyKind() == OMPC_DEPEND_source) 6916 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 6917 else { 6918 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 6919 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 6920 } 6921 CGF.EmitRuntimeCall(RTLFn, Args); 6922 } 6923 6924