1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/Bitcode/BitcodeReader.h" 23 #include "llvm/IR/CallSite.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// \brief Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// \brief Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// \brief Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// \brief Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// \brief Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// \brief Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// \brief Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// \brief Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// \brief Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// \brief API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// \brief Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// \brief Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// \brief A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// \brief API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); 150 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 151 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 152 CGF.EmitBlock(DoneBB); 153 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 154 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 155 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 156 CGF.Builder.GetInsertBlock()); 157 emitUntiedSwitch(CGF); 158 } 159 } 160 void emitUntiedSwitch(CodeGenFunction &CGF) const { 161 if (Untied) { 162 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 166 PartIdLVal); 167 UntiedCodeGen(CGF); 168 CodeGenFunction::JumpDest CurPoint = 169 CGF.getJumpDestInCurrentScope(".untied.next."); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 173 CGF.Builder.GetInsertBlock()); 174 CGF.EmitBranchThroughCleanup(CurPoint); 175 CGF.EmitBlock(CurPoint.getBlock()); 176 } 177 } 178 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 179 }; 180 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 181 const VarDecl *ThreadIDVar, 182 const RegionCodeGenTy &CodeGen, 183 OpenMPDirectiveKind Kind, bool HasCancel, 184 const UntiedTaskActionTy &Action) 185 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 186 ThreadIDVar(ThreadIDVar), Action(Action) { 187 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 188 } 189 190 /// \brief Get a variable or parameter for storing global thread id 191 /// inside OpenMP construct. 192 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 193 194 /// \brief Get an LValue for the current ThreadID variable. 195 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 196 197 /// \brief Get the name of the capture helper. 198 StringRef getHelperName() const override { return ".omp_outlined."; } 199 200 void emitUntiedSwitch(CodeGenFunction &CGF) override { 201 Action.emitUntiedSwitch(CGF); 202 } 203 204 static bool classof(const CGCapturedStmtInfo *Info) { 205 return CGOpenMPRegionInfo::classof(Info) && 206 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 207 TaskOutlinedRegion; 208 } 209 210 private: 211 /// \brief A variable or parameter storing global thread id for OpenMP 212 /// constructs. 213 const VarDecl *ThreadIDVar; 214 /// Action for emitting code for untied tasks. 215 const UntiedTaskActionTy &Action; 216 }; 217 218 /// \brief API for inlined captured statement code generation in OpenMP 219 /// constructs. 220 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 221 public: 222 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 223 const RegionCodeGenTy &CodeGen, 224 OpenMPDirectiveKind Kind, bool HasCancel) 225 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 226 OldCSI(OldCSI), 227 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 228 229 // \brief Retrieve the value of the context parameter. 230 llvm::Value *getContextValue() const override { 231 if (OuterRegionInfo) 232 return OuterRegionInfo->getContextValue(); 233 llvm_unreachable("No context value for inlined OpenMP region"); 234 } 235 236 void setContextValue(llvm::Value *V) override { 237 if (OuterRegionInfo) { 238 OuterRegionInfo->setContextValue(V); 239 return; 240 } 241 llvm_unreachable("No context value for inlined OpenMP region"); 242 } 243 244 /// \brief Lookup the captured field decl for a variable. 245 const FieldDecl *lookup(const VarDecl *VD) const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->lookup(VD); 248 // If there is no outer outlined region,no need to lookup in a list of 249 // captured variables, we can use the original one. 250 return nullptr; 251 } 252 253 FieldDecl *getThisFieldDecl() const override { 254 if (OuterRegionInfo) 255 return OuterRegionInfo->getThisFieldDecl(); 256 return nullptr; 257 } 258 259 /// \brief Get a variable or parameter for storing global thread id 260 /// inside OpenMP construct. 261 const VarDecl *getThreadIDVariable() const override { 262 if (OuterRegionInfo) 263 return OuterRegionInfo->getThreadIDVariable(); 264 return nullptr; 265 } 266 267 /// \brief Get the name of the capture helper. 268 StringRef getHelperName() const override { 269 if (auto *OuterRegionInfo = getOldCSI()) 270 return OuterRegionInfo->getHelperName(); 271 llvm_unreachable("No helper name for inlined OpenMP construct"); 272 } 273 274 void emitUntiedSwitch(CodeGenFunction &CGF) override { 275 if (OuterRegionInfo) 276 OuterRegionInfo->emitUntiedSwitch(CGF); 277 } 278 279 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 280 281 static bool classof(const CGCapturedStmtInfo *Info) { 282 return CGOpenMPRegionInfo::classof(Info) && 283 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 284 } 285 286 ~CGOpenMPInlinedRegionInfo() override = default; 287 288 private: 289 /// \brief CodeGen info about outer OpenMP region. 290 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 291 CGOpenMPRegionInfo *OuterRegionInfo; 292 }; 293 294 /// \brief API for captured statement code generation in OpenMP target 295 /// constructs. For this captures, implicit parameters are used instead of the 296 /// captured fields. The name of the target region has to be unique in a given 297 /// application so it is provided by the client, because only the client has 298 /// the information to generate that. 299 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 300 public: 301 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 302 const RegionCodeGenTy &CodeGen, StringRef HelperName) 303 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 304 /*HasCancel=*/false), 305 HelperName(HelperName) {} 306 307 /// \brief This is unused for target regions because each starts executing 308 /// with a single thread. 309 const VarDecl *getThreadIDVariable() const override { return nullptr; } 310 311 /// \brief Get the name of the capture helper. 312 StringRef getHelperName() const override { return HelperName; } 313 314 static bool classof(const CGCapturedStmtInfo *Info) { 315 return CGOpenMPRegionInfo::classof(Info) && 316 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 317 } 318 319 private: 320 StringRef HelperName; 321 }; 322 323 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 324 llvm_unreachable("No codegen for expressions"); 325 } 326 /// \brief API for generation of expressions captured in a innermost OpenMP 327 /// region. 328 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 329 public: 330 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 331 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 332 OMPD_unknown, 333 /*HasCancel=*/false), 334 PrivScope(CGF) { 335 // Make sure the globals captured in the provided statement are local by 336 // using the privatization logic. We assume the same variable is not 337 // captured more than once. 338 for (auto &C : CS.captures()) { 339 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 340 continue; 341 342 const VarDecl *VD = C.getCapturedVar(); 343 if (VD->isLocalVarDeclOrParm()) 344 continue; 345 346 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 347 /*RefersToEnclosingVariableOrCapture=*/false, 348 VD->getType().getNonReferenceType(), VK_LValue, 349 SourceLocation()); 350 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 351 return CGF.EmitLValue(&DRE).getAddress(); 352 }); 353 } 354 (void)PrivScope.Privatize(); 355 } 356 357 /// \brief Lookup the captured field decl for a variable. 358 const FieldDecl *lookup(const VarDecl *VD) const override { 359 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 360 return FD; 361 return nullptr; 362 } 363 364 /// \brief Emit the captured statement body. 365 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 366 llvm_unreachable("No body for expressions"); 367 } 368 369 /// \brief Get a variable or parameter for storing global thread id 370 /// inside OpenMP construct. 371 const VarDecl *getThreadIDVariable() const override { 372 llvm_unreachable("No thread id for expressions"); 373 } 374 375 /// \brief Get the name of the capture helper. 376 StringRef getHelperName() const override { 377 llvm_unreachable("No helper name for expressions"); 378 } 379 380 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 381 382 private: 383 /// Private scope to capture global variables. 384 CodeGenFunction::OMPPrivateScope PrivScope; 385 }; 386 387 /// \brief RAII for emitting code of OpenMP constructs. 388 class InlinedOpenMPRegionRAII { 389 CodeGenFunction &CGF; 390 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 391 FieldDecl *LambdaThisCaptureField = nullptr; 392 393 public: 394 /// \brief Constructs region for combined constructs. 395 /// \param CodeGen Code generation sequence for combined directives. Includes 396 /// a list of functions used for code generation of implicitly inlined 397 /// regions. 398 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 399 OpenMPDirectiveKind Kind, bool HasCancel) 400 : CGF(CGF) { 401 // Start emission for the construct. 402 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 403 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 404 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 405 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 406 CGF.LambdaThisCaptureField = nullptr; 407 } 408 409 ~InlinedOpenMPRegionRAII() { 410 // Restore original CapturedStmtInfo only if we're done with code emission. 411 auto *OldCSI = 412 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 413 delete CGF.CapturedStmtInfo; 414 CGF.CapturedStmtInfo = OldCSI; 415 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 416 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 417 } 418 }; 419 420 /// \brief Values for bit flags used in the ident_t to describe the fields. 421 /// All enumeric elements are named and described in accordance with the code 422 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 423 enum OpenMPLocationFlags { 424 /// \brief Use trampoline for internal microtask. 425 OMP_IDENT_IMD = 0x01, 426 /// \brief Use c-style ident structure. 427 OMP_IDENT_KMPC = 0x02, 428 /// \brief Atomic reduction option for kmpc_reduce. 429 OMP_ATOMIC_REDUCE = 0x10, 430 /// \brief Explicit 'barrier' directive. 431 OMP_IDENT_BARRIER_EXPL = 0x20, 432 /// \brief Implicit barrier in code. 433 OMP_IDENT_BARRIER_IMPL = 0x40, 434 /// \brief Implicit barrier in 'for' directive. 435 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 436 /// \brief Implicit barrier in 'sections' directive. 437 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 438 /// \brief Implicit barrier in 'single' directive. 439 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 440 }; 441 442 /// \brief Describes ident structure that describes a source location. 443 /// All descriptions are taken from 444 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 445 /// Original structure: 446 /// typedef struct ident { 447 /// kmp_int32 reserved_1; /**< might be used in Fortran; 448 /// see above */ 449 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 450 /// KMP_IDENT_KMPC identifies this union 451 /// member */ 452 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 453 /// see above */ 454 ///#if USE_ITT_BUILD 455 /// /* but currently used for storing 456 /// region-specific ITT */ 457 /// /* contextual information. */ 458 ///#endif /* USE_ITT_BUILD */ 459 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 460 /// C++ */ 461 /// char const *psource; /**< String describing the source location. 462 /// The string is composed of semi-colon separated 463 // fields which describe the source file, 464 /// the function and a pair of line numbers that 465 /// delimit the construct. 466 /// */ 467 /// } ident_t; 468 enum IdentFieldIndex { 469 /// \brief might be used in Fortran 470 IdentField_Reserved_1, 471 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 472 IdentField_Flags, 473 /// \brief Not really used in Fortran any more 474 IdentField_Reserved_2, 475 /// \brief Source[4] in Fortran, do not use for C++ 476 IdentField_Reserved_3, 477 /// \brief String describing the source location. The string is composed of 478 /// semi-colon separated fields which describe the source file, the function 479 /// and a pair of line numbers that delimit the construct. 480 IdentField_PSource 481 }; 482 483 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 484 /// the enum sched_type in kmp.h). 485 enum OpenMPSchedType { 486 /// \brief Lower bound for default (unordered) versions. 487 OMP_sch_lower = 32, 488 OMP_sch_static_chunked = 33, 489 OMP_sch_static = 34, 490 OMP_sch_dynamic_chunked = 35, 491 OMP_sch_guided_chunked = 36, 492 OMP_sch_runtime = 37, 493 OMP_sch_auto = 38, 494 /// static with chunk adjustment (e.g., simd) 495 OMP_sch_static_balanced_chunked = 45, 496 /// \brief Lower bound for 'ordered' versions. 497 OMP_ord_lower = 64, 498 OMP_ord_static_chunked = 65, 499 OMP_ord_static = 66, 500 OMP_ord_dynamic_chunked = 67, 501 OMP_ord_guided_chunked = 68, 502 OMP_ord_runtime = 69, 503 OMP_ord_auto = 70, 504 OMP_sch_default = OMP_sch_static, 505 /// \brief dist_schedule types 506 OMP_dist_sch_static_chunked = 91, 507 OMP_dist_sch_static = 92, 508 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 509 /// Set if the monotonic schedule modifier was present. 510 OMP_sch_modifier_monotonic = (1 << 29), 511 /// Set if the nonmonotonic schedule modifier was present. 512 OMP_sch_modifier_nonmonotonic = (1 << 30), 513 }; 514 515 enum OpenMPRTLFunction { 516 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 517 /// kmpc_micro microtask, ...); 518 OMPRTL__kmpc_fork_call, 519 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 520 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 521 OMPRTL__kmpc_threadprivate_cached, 522 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 523 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 524 OMPRTL__kmpc_threadprivate_register, 525 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 526 OMPRTL__kmpc_global_thread_num, 527 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 528 // kmp_critical_name *crit); 529 OMPRTL__kmpc_critical, 530 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 531 // global_tid, kmp_critical_name *crit, uintptr_t hint); 532 OMPRTL__kmpc_critical_with_hint, 533 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 534 // kmp_critical_name *crit); 535 OMPRTL__kmpc_end_critical, 536 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 537 // global_tid); 538 OMPRTL__kmpc_cancel_barrier, 539 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 540 OMPRTL__kmpc_barrier, 541 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 542 OMPRTL__kmpc_for_static_fini, 543 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 544 // global_tid); 545 OMPRTL__kmpc_serialized_parallel, 546 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 547 // global_tid); 548 OMPRTL__kmpc_end_serialized_parallel, 549 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 550 // kmp_int32 num_threads); 551 OMPRTL__kmpc_push_num_threads, 552 // Call to void __kmpc_flush(ident_t *loc); 553 OMPRTL__kmpc_flush, 554 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 555 OMPRTL__kmpc_master, 556 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 557 OMPRTL__kmpc_end_master, 558 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 559 // int end_part); 560 OMPRTL__kmpc_omp_taskyield, 561 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 562 OMPRTL__kmpc_single, 563 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 564 OMPRTL__kmpc_end_single, 565 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 566 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 567 // kmp_routine_entry_t *task_entry); 568 OMPRTL__kmpc_omp_task_alloc, 569 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 570 // new_task); 571 OMPRTL__kmpc_omp_task, 572 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 573 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 574 // kmp_int32 didit); 575 OMPRTL__kmpc_copyprivate, 576 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 577 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 578 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 579 OMPRTL__kmpc_reduce, 580 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 581 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 582 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 583 // *lck); 584 OMPRTL__kmpc_reduce_nowait, 585 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 586 // kmp_critical_name *lck); 587 OMPRTL__kmpc_end_reduce, 588 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 589 // kmp_critical_name *lck); 590 OMPRTL__kmpc_end_reduce_nowait, 591 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 592 // kmp_task_t * new_task); 593 OMPRTL__kmpc_omp_task_begin_if0, 594 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 595 // kmp_task_t * new_task); 596 OMPRTL__kmpc_omp_task_complete_if0, 597 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 598 OMPRTL__kmpc_ordered, 599 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 600 OMPRTL__kmpc_end_ordered, 601 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 602 // global_tid); 603 OMPRTL__kmpc_omp_taskwait, 604 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 605 OMPRTL__kmpc_taskgroup, 606 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 607 OMPRTL__kmpc_end_taskgroup, 608 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 609 // int proc_bind); 610 OMPRTL__kmpc_push_proc_bind, 611 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 612 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 613 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 614 OMPRTL__kmpc_omp_task_with_deps, 615 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 616 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 617 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 618 OMPRTL__kmpc_omp_wait_deps, 619 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 620 // global_tid, kmp_int32 cncl_kind); 621 OMPRTL__kmpc_cancellationpoint, 622 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 623 // kmp_int32 cncl_kind); 624 OMPRTL__kmpc_cancel, 625 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_teams, kmp_int32 thread_limit); 627 OMPRTL__kmpc_push_num_teams, 628 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 629 // microtask, ...); 630 OMPRTL__kmpc_fork_teams, 631 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 632 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 633 // sched, kmp_uint64 grainsize, void *task_dup); 634 OMPRTL__kmpc_taskloop, 635 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 636 // num_dims, struct kmp_dim *dims); 637 OMPRTL__kmpc_doacross_init, 638 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 639 OMPRTL__kmpc_doacross_fini, 640 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 641 // *vec); 642 OMPRTL__kmpc_doacross_post, 643 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 644 // *vec); 645 OMPRTL__kmpc_doacross_wait, 646 647 // 648 // Offloading related calls 649 // 650 // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 651 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 652 // *arg_types); 653 OMPRTL__tgt_target, 654 // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 655 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 656 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 657 OMPRTL__tgt_target_teams, 658 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 659 OMPRTL__tgt_register_lib, 660 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 661 OMPRTL__tgt_unregister_lib, 662 // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 663 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 664 OMPRTL__tgt_target_data_begin, 665 // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 666 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 667 OMPRTL__tgt_target_data_end, 668 // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 669 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 670 OMPRTL__tgt_target_data_update, 671 }; 672 673 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 674 /// region. 675 class CleanupTy final : public EHScopeStack::Cleanup { 676 PrePostActionTy *Action; 677 678 public: 679 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 680 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 681 if (!CGF.HaveInsertPoint()) 682 return; 683 Action->Exit(CGF); 684 } 685 }; 686 687 } // anonymous namespace 688 689 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 690 CodeGenFunction::RunCleanupsScope Scope(CGF); 691 if (PrePostAction) { 692 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 693 Callback(CodeGen, CGF, *PrePostAction); 694 } else { 695 PrePostActionTy Action; 696 Callback(CodeGen, CGF, Action); 697 } 698 } 699 700 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 701 return CGF.EmitLoadOfPointerLValue( 702 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 703 getThreadIDVariable()->getType()->castAs<PointerType>()); 704 } 705 706 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 707 if (!CGF.HaveInsertPoint()) 708 return; 709 // 1.2.2 OpenMP Language Terminology 710 // Structured block - An executable statement with a single entry at the 711 // top and a single exit at the bottom. 712 // The point of exit cannot be a branch out of the structured block. 713 // longjmp() and throw() must not violate the entry/exit criteria. 714 CGF.EHStack.pushTerminate(); 715 CodeGen(CGF); 716 CGF.EHStack.popTerminate(); 717 } 718 719 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 720 CodeGenFunction &CGF) { 721 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 722 getThreadIDVariable()->getType(), 723 AlignmentSource::Decl); 724 } 725 726 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 727 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 728 IdentTy = llvm::StructType::create( 729 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 730 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 731 CGM.Int8PtrTy /* psource */); 732 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 733 734 loadOffloadInfoMetadata(); 735 } 736 737 void CGOpenMPRuntime::clear() { 738 InternalVars.clear(); 739 } 740 741 static llvm::Function * 742 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 743 const Expr *CombinerInitializer, const VarDecl *In, 744 const VarDecl *Out, bool IsCombiner) { 745 // void .omp_combiner.(Ty *in, Ty *out); 746 auto &C = CGM.getContext(); 747 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 748 FunctionArgList Args; 749 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 750 /*Id=*/nullptr, PtrTy); 751 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 752 /*Id=*/nullptr, PtrTy); 753 Args.push_back(&OmpOutParm); 754 Args.push_back(&OmpInParm); 755 auto &FnInfo = 756 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 757 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 758 auto *Fn = llvm::Function::Create( 759 FnTy, llvm::GlobalValue::InternalLinkage, 760 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 761 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 762 Fn->removeFnAttr(llvm::Attribute::NoInline); 763 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 764 CodeGenFunction CGF(CGM); 765 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 766 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 767 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 768 CodeGenFunction::OMPPrivateScope Scope(CGF); 769 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 770 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 771 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 772 .getAddress(); 773 }); 774 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 775 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 776 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 777 .getAddress(); 778 }); 779 (void)Scope.Privatize(); 780 CGF.EmitIgnoredExpr(CombinerInitializer); 781 Scope.ForceCleanup(); 782 CGF.FinishFunction(); 783 return Fn; 784 } 785 786 void CGOpenMPRuntime::emitUserDefinedReduction( 787 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 788 if (UDRMap.count(D) > 0) 789 return; 790 auto &C = CGM.getContext(); 791 if (!In || !Out) { 792 In = &C.Idents.get("omp_in"); 793 Out = &C.Idents.get("omp_out"); 794 } 795 llvm::Function *Combiner = emitCombinerOrInitializer( 796 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 797 cast<VarDecl>(D->lookup(Out).front()), 798 /*IsCombiner=*/true); 799 llvm::Function *Initializer = nullptr; 800 if (auto *Init = D->getInitializer()) { 801 if (!Priv || !Orig) { 802 Priv = &C.Idents.get("omp_priv"); 803 Orig = &C.Idents.get("omp_orig"); 804 } 805 Initializer = emitCombinerOrInitializer( 806 CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), 807 cast<VarDecl>(D->lookup(Priv).front()), 808 /*IsCombiner=*/false); 809 } 810 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 811 if (CGF) { 812 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 813 Decls.second.push_back(D); 814 } 815 } 816 817 std::pair<llvm::Function *, llvm::Function *> 818 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 819 auto I = UDRMap.find(D); 820 if (I != UDRMap.end()) 821 return I->second; 822 emitUserDefinedReduction(/*CGF=*/nullptr, D); 823 return UDRMap.lookup(D); 824 } 825 826 // Layout information for ident_t. 827 static CharUnits getIdentAlign(CodeGenModule &CGM) { 828 return CGM.getPointerAlign(); 829 } 830 static CharUnits getIdentSize(CodeGenModule &CGM) { 831 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 832 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 833 } 834 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 835 // All the fields except the last are i32, so this works beautifully. 836 return unsigned(Field) * CharUnits::fromQuantity(4); 837 } 838 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 839 IdentFieldIndex Field, 840 const llvm::Twine &Name = "") { 841 auto Offset = getOffsetOfIdentField(Field); 842 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 843 } 844 845 static llvm::Value *emitParallelOrTeamsOutlinedFunction( 846 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 847 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 848 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 849 assert(ThreadIDVar->getType()->isPointerType() && 850 "thread id variable must be of type kmp_int32 *"); 851 CodeGenFunction CGF(CGM, true); 852 bool HasCancel = false; 853 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 854 HasCancel = OPD->hasCancel(); 855 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 856 HasCancel = OPSD->hasCancel(); 857 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 858 HasCancel = OPFD->hasCancel(); 859 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 860 HasCancel, OutlinedHelperName); 861 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 862 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 863 } 864 865 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 866 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 867 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 868 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 869 return emitParallelOrTeamsOutlinedFunction( 870 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 871 } 872 873 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( 874 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 875 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 876 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 877 return emitParallelOrTeamsOutlinedFunction( 878 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 879 } 880 881 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 882 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 883 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 884 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 885 bool Tied, unsigned &NumberOfParts) { 886 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 887 PrePostActionTy &) { 888 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 889 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 890 llvm::Value *TaskArgs[] = { 891 UpLoc, ThreadID, 892 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 893 TaskTVar->getType()->castAs<PointerType>()) 894 .getPointer()}; 895 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 896 }; 897 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 898 UntiedCodeGen); 899 CodeGen.setAction(Action); 900 assert(!ThreadIDVar->getType()->isPointerType() && 901 "thread id variable must be of type kmp_int32 for tasks"); 902 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 903 auto *TD = dyn_cast<OMPTaskDirective>(&D); 904 CodeGenFunction CGF(CGM, true); 905 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 906 InnermostKind, 907 TD ? TD->hasCancel() : false, Action); 908 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 909 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 910 if (!Tied) 911 NumberOfParts = Action.getNumberOfParts(); 912 return Res; 913 } 914 915 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 916 CharUnits Align = getIdentAlign(CGM); 917 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 918 if (!Entry) { 919 if (!DefaultOpenMPPSource) { 920 // Initialize default location for psource field of ident_t structure of 921 // all ident_t objects. Format is ";file;function;line;column;;". 922 // Taken from 923 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 924 DefaultOpenMPPSource = 925 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 926 DefaultOpenMPPSource = 927 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 928 } 929 930 ConstantInitBuilder builder(CGM); 931 auto fields = builder.beginStruct(IdentTy); 932 fields.addInt(CGM.Int32Ty, 0); 933 fields.addInt(CGM.Int32Ty, Flags); 934 fields.addInt(CGM.Int32Ty, 0); 935 fields.addInt(CGM.Int32Ty, 0); 936 fields.add(DefaultOpenMPPSource); 937 auto DefaultOpenMPLocation = 938 fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, 939 llvm::GlobalValue::PrivateLinkage); 940 DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 941 942 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 943 } 944 return Address(Entry, Align); 945 } 946 947 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 948 SourceLocation Loc, 949 unsigned Flags) { 950 Flags |= OMP_IDENT_KMPC; 951 // If no debug info is generated - return global default location. 952 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 953 Loc.isInvalid()) 954 return getOrCreateDefaultLocation(Flags).getPointer(); 955 956 assert(CGF.CurFn && "No function in current CodeGenFunction."); 957 958 Address LocValue = Address::invalid(); 959 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 960 if (I != OpenMPLocThreadIDMap.end()) 961 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 962 963 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 964 // GetOpenMPThreadID was called before this routine. 965 if (!LocValue.isValid()) { 966 // Generate "ident_t .kmpc_loc.addr;" 967 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 968 ".kmpc_loc.addr"); 969 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 970 Elem.second.DebugLoc = AI.getPointer(); 971 LocValue = AI; 972 973 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 974 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 975 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 976 CGM.getSize(getIdentSize(CGF.CGM))); 977 } 978 979 // char **psource = &.kmpc_loc_<flags>.addr.psource; 980 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 981 982 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 983 if (OMPDebugLoc == nullptr) { 984 SmallString<128> Buffer2; 985 llvm::raw_svector_ostream OS2(Buffer2); 986 // Build debug location 987 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 988 OS2 << ";" << PLoc.getFilename() << ";"; 989 if (const FunctionDecl *FD = 990 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 991 OS2 << FD->getQualifiedNameAsString(); 992 } 993 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 994 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 995 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 996 } 997 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 998 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 999 1000 // Our callers always pass this to a runtime function, so for 1001 // convenience, go ahead and return a naked pointer. 1002 return LocValue.getPointer(); 1003 } 1004 1005 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1006 SourceLocation Loc) { 1007 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1008 1009 llvm::Value *ThreadID = nullptr; 1010 // Check whether we've already cached a load of the thread id in this 1011 // function. 1012 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1013 if (I != OpenMPLocThreadIDMap.end()) { 1014 ThreadID = I->second.ThreadID; 1015 if (ThreadID != nullptr) 1016 return ThreadID; 1017 } 1018 if (auto *OMPRegionInfo = 1019 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1020 if (OMPRegionInfo->getThreadIDVariable()) { 1021 // Check if this an outlined function with thread id passed as argument. 1022 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1023 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 1024 // If value loaded in entry block, cache it and use it everywhere in 1025 // function. 1026 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1027 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1028 Elem.second.ThreadID = ThreadID; 1029 } 1030 return ThreadID; 1031 } 1032 } 1033 1034 // This is not an outlined function region - need to call __kmpc_int32 1035 // kmpc_global_thread_num(ident_t *loc). 1036 // Generate thread id value and cache this value for use across the 1037 // function. 1038 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1039 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1040 ThreadID = 1041 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1042 emitUpdateLocation(CGF, Loc)); 1043 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1044 Elem.second.ThreadID = ThreadID; 1045 return ThreadID; 1046 } 1047 1048 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1049 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1050 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1051 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1052 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1053 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1054 UDRMap.erase(D); 1055 } 1056 FunctionUDRMap.erase(CGF.CurFn); 1057 } 1058 } 1059 1060 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1061 if (!IdentTy) { 1062 } 1063 return llvm::PointerType::getUnqual(IdentTy); 1064 } 1065 1066 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1067 if (!Kmpc_MicroTy) { 1068 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1069 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1070 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1071 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1072 } 1073 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1074 } 1075 1076 llvm::Constant * 1077 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1078 llvm::Constant *RTLFn = nullptr; 1079 switch (static_cast<OpenMPRTLFunction>(Function)) { 1080 case OMPRTL__kmpc_fork_call: { 1081 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1082 // microtask, ...); 1083 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1084 getKmpc_MicroPointerTy()}; 1085 llvm::FunctionType *FnTy = 1086 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1087 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1088 break; 1089 } 1090 case OMPRTL__kmpc_global_thread_num: { 1091 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1092 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1093 llvm::FunctionType *FnTy = 1094 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1095 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1096 break; 1097 } 1098 case OMPRTL__kmpc_threadprivate_cached: { 1099 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1100 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1101 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1102 CGM.VoidPtrTy, CGM.SizeTy, 1103 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1104 llvm::FunctionType *FnTy = 1105 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1106 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1107 break; 1108 } 1109 case OMPRTL__kmpc_critical: { 1110 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1111 // kmp_critical_name *crit); 1112 llvm::Type *TypeParams[] = { 1113 getIdentTyPointerTy(), CGM.Int32Ty, 1114 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1115 llvm::FunctionType *FnTy = 1116 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1117 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1118 break; 1119 } 1120 case OMPRTL__kmpc_critical_with_hint: { 1121 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1122 // kmp_critical_name *crit, uintptr_t hint); 1123 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1124 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1125 CGM.IntPtrTy}; 1126 llvm::FunctionType *FnTy = 1127 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1128 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1129 break; 1130 } 1131 case OMPRTL__kmpc_threadprivate_register: { 1132 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1133 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1134 // typedef void *(*kmpc_ctor)(void *); 1135 auto KmpcCtorTy = 1136 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1137 /*isVarArg*/ false)->getPointerTo(); 1138 // typedef void *(*kmpc_cctor)(void *, void *); 1139 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1140 auto KmpcCopyCtorTy = 1141 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1142 /*isVarArg*/ false)->getPointerTo(); 1143 // typedef void (*kmpc_dtor)(void *); 1144 auto KmpcDtorTy = 1145 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1146 ->getPointerTo(); 1147 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1148 KmpcCopyCtorTy, KmpcDtorTy}; 1149 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1150 /*isVarArg*/ false); 1151 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1152 break; 1153 } 1154 case OMPRTL__kmpc_end_critical: { 1155 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1156 // kmp_critical_name *crit); 1157 llvm::Type *TypeParams[] = { 1158 getIdentTyPointerTy(), CGM.Int32Ty, 1159 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1160 llvm::FunctionType *FnTy = 1161 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1162 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1163 break; 1164 } 1165 case OMPRTL__kmpc_cancel_barrier: { 1166 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1167 // global_tid); 1168 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1169 llvm::FunctionType *FnTy = 1170 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1171 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1172 break; 1173 } 1174 case OMPRTL__kmpc_barrier: { 1175 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1176 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1177 llvm::FunctionType *FnTy = 1178 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1179 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1180 break; 1181 } 1182 case OMPRTL__kmpc_for_static_fini: { 1183 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1184 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1185 llvm::FunctionType *FnTy = 1186 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1187 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1188 break; 1189 } 1190 case OMPRTL__kmpc_push_num_threads: { 1191 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1192 // kmp_int32 num_threads) 1193 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1194 CGM.Int32Ty}; 1195 llvm::FunctionType *FnTy = 1196 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1197 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1198 break; 1199 } 1200 case OMPRTL__kmpc_serialized_parallel: { 1201 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1202 // global_tid); 1203 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1204 llvm::FunctionType *FnTy = 1205 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1206 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1207 break; 1208 } 1209 case OMPRTL__kmpc_end_serialized_parallel: { 1210 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1211 // global_tid); 1212 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1213 llvm::FunctionType *FnTy = 1214 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1215 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1216 break; 1217 } 1218 case OMPRTL__kmpc_flush: { 1219 // Build void __kmpc_flush(ident_t *loc); 1220 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1221 llvm::FunctionType *FnTy = 1222 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1223 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1224 break; 1225 } 1226 case OMPRTL__kmpc_master: { 1227 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1228 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1229 llvm::FunctionType *FnTy = 1230 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1231 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1232 break; 1233 } 1234 case OMPRTL__kmpc_end_master: { 1235 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1236 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1237 llvm::FunctionType *FnTy = 1238 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1239 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1240 break; 1241 } 1242 case OMPRTL__kmpc_omp_taskyield: { 1243 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1244 // int end_part); 1245 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1246 llvm::FunctionType *FnTy = 1247 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1248 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1249 break; 1250 } 1251 case OMPRTL__kmpc_single: { 1252 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1253 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1254 llvm::FunctionType *FnTy = 1255 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1256 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1257 break; 1258 } 1259 case OMPRTL__kmpc_end_single: { 1260 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1261 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1262 llvm::FunctionType *FnTy = 1263 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1264 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1265 break; 1266 } 1267 case OMPRTL__kmpc_omp_task_alloc: { 1268 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1269 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1270 // kmp_routine_entry_t *task_entry); 1271 assert(KmpRoutineEntryPtrTy != nullptr && 1272 "Type kmp_routine_entry_t must be created."); 1273 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1274 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1275 // Return void * and then cast to particular kmp_task_t type. 1276 llvm::FunctionType *FnTy = 1277 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1278 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1279 break; 1280 } 1281 case OMPRTL__kmpc_omp_task: { 1282 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1283 // *new_task); 1284 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1285 CGM.VoidPtrTy}; 1286 llvm::FunctionType *FnTy = 1287 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1288 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1289 break; 1290 } 1291 case OMPRTL__kmpc_copyprivate: { 1292 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1293 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1294 // kmp_int32 didit); 1295 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1296 auto *CpyFnTy = 1297 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1298 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1299 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1300 CGM.Int32Ty}; 1301 llvm::FunctionType *FnTy = 1302 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1303 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1304 break; 1305 } 1306 case OMPRTL__kmpc_reduce: { 1307 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1308 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1309 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1310 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1311 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1312 /*isVarArg=*/false); 1313 llvm::Type *TypeParams[] = { 1314 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1315 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1316 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1317 llvm::FunctionType *FnTy = 1318 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1319 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1320 break; 1321 } 1322 case OMPRTL__kmpc_reduce_nowait: { 1323 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1324 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1325 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1326 // *lck); 1327 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1328 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1329 /*isVarArg=*/false); 1330 llvm::Type *TypeParams[] = { 1331 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1332 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1333 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1334 llvm::FunctionType *FnTy = 1335 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1336 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1337 break; 1338 } 1339 case OMPRTL__kmpc_end_reduce: { 1340 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1341 // kmp_critical_name *lck); 1342 llvm::Type *TypeParams[] = { 1343 getIdentTyPointerTy(), CGM.Int32Ty, 1344 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1345 llvm::FunctionType *FnTy = 1346 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1347 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1348 break; 1349 } 1350 case OMPRTL__kmpc_end_reduce_nowait: { 1351 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1352 // kmp_critical_name *lck); 1353 llvm::Type *TypeParams[] = { 1354 getIdentTyPointerTy(), CGM.Int32Ty, 1355 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1356 llvm::FunctionType *FnTy = 1357 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1358 RTLFn = 1359 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1360 break; 1361 } 1362 case OMPRTL__kmpc_omp_task_begin_if0: { 1363 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1364 // *new_task); 1365 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1366 CGM.VoidPtrTy}; 1367 llvm::FunctionType *FnTy = 1368 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1369 RTLFn = 1370 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1371 break; 1372 } 1373 case OMPRTL__kmpc_omp_task_complete_if0: { 1374 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1375 // *new_task); 1376 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1377 CGM.VoidPtrTy}; 1378 llvm::FunctionType *FnTy = 1379 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1380 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1381 /*Name=*/"__kmpc_omp_task_complete_if0"); 1382 break; 1383 } 1384 case OMPRTL__kmpc_ordered: { 1385 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1386 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1387 llvm::FunctionType *FnTy = 1388 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1389 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1390 break; 1391 } 1392 case OMPRTL__kmpc_end_ordered: { 1393 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1394 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1395 llvm::FunctionType *FnTy = 1396 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1397 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1398 break; 1399 } 1400 case OMPRTL__kmpc_omp_taskwait: { 1401 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1402 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1403 llvm::FunctionType *FnTy = 1404 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1405 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1406 break; 1407 } 1408 case OMPRTL__kmpc_taskgroup: { 1409 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1410 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1411 llvm::FunctionType *FnTy = 1412 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1413 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1414 break; 1415 } 1416 case OMPRTL__kmpc_end_taskgroup: { 1417 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1418 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1419 llvm::FunctionType *FnTy = 1420 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1421 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1422 break; 1423 } 1424 case OMPRTL__kmpc_push_proc_bind: { 1425 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1426 // int proc_bind) 1427 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1428 llvm::FunctionType *FnTy = 1429 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1430 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1431 break; 1432 } 1433 case OMPRTL__kmpc_omp_task_with_deps: { 1434 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1435 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1436 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1437 llvm::Type *TypeParams[] = { 1438 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1439 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1440 llvm::FunctionType *FnTy = 1441 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1442 RTLFn = 1443 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1444 break; 1445 } 1446 case OMPRTL__kmpc_omp_wait_deps: { 1447 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1448 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1449 // kmp_depend_info_t *noalias_dep_list); 1450 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1451 CGM.Int32Ty, CGM.VoidPtrTy, 1452 CGM.Int32Ty, CGM.VoidPtrTy}; 1453 llvm::FunctionType *FnTy = 1454 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1455 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1456 break; 1457 } 1458 case OMPRTL__kmpc_cancellationpoint: { 1459 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1460 // global_tid, kmp_int32 cncl_kind) 1461 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1462 llvm::FunctionType *FnTy = 1463 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1464 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1465 break; 1466 } 1467 case OMPRTL__kmpc_cancel: { 1468 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1469 // kmp_int32 cncl_kind) 1470 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1471 llvm::FunctionType *FnTy = 1472 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1473 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1474 break; 1475 } 1476 case OMPRTL__kmpc_push_num_teams: { 1477 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1478 // kmp_int32 num_teams, kmp_int32 num_threads) 1479 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1480 CGM.Int32Ty}; 1481 llvm::FunctionType *FnTy = 1482 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1483 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1484 break; 1485 } 1486 case OMPRTL__kmpc_fork_teams: { 1487 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1488 // microtask, ...); 1489 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1490 getKmpc_MicroPointerTy()}; 1491 llvm::FunctionType *FnTy = 1492 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1493 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1494 break; 1495 } 1496 case OMPRTL__kmpc_taskloop: { 1497 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1498 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1499 // sched, kmp_uint64 grainsize, void *task_dup); 1500 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1501 CGM.IntTy, 1502 CGM.VoidPtrTy, 1503 CGM.IntTy, 1504 CGM.Int64Ty->getPointerTo(), 1505 CGM.Int64Ty->getPointerTo(), 1506 CGM.Int64Ty, 1507 CGM.IntTy, 1508 CGM.IntTy, 1509 CGM.Int64Ty, 1510 CGM.VoidPtrTy}; 1511 llvm::FunctionType *FnTy = 1512 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1513 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 1514 break; 1515 } 1516 case OMPRTL__kmpc_doacross_init: { 1517 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 1518 // num_dims, struct kmp_dim *dims); 1519 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1520 CGM.Int32Ty, 1521 CGM.Int32Ty, 1522 CGM.VoidPtrTy}; 1523 llvm::FunctionType *FnTy = 1524 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1525 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 1526 break; 1527 } 1528 case OMPRTL__kmpc_doacross_fini: { 1529 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 1530 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1531 llvm::FunctionType *FnTy = 1532 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1533 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 1534 break; 1535 } 1536 case OMPRTL__kmpc_doacross_post: { 1537 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 1538 // *vec); 1539 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1540 CGM.Int64Ty->getPointerTo()}; 1541 llvm::FunctionType *FnTy = 1542 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1543 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 1544 break; 1545 } 1546 case OMPRTL__kmpc_doacross_wait: { 1547 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 1548 // *vec); 1549 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1550 CGM.Int64Ty->getPointerTo()}; 1551 llvm::FunctionType *FnTy = 1552 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1553 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 1554 break; 1555 } 1556 case OMPRTL__tgt_target: { 1557 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 1558 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 1559 // *arg_types); 1560 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1561 CGM.VoidPtrTy, 1562 CGM.Int32Ty, 1563 CGM.VoidPtrPtrTy, 1564 CGM.VoidPtrPtrTy, 1565 CGM.SizeTy->getPointerTo(), 1566 CGM.Int32Ty->getPointerTo()}; 1567 llvm::FunctionType *FnTy = 1568 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1569 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 1570 break; 1571 } 1572 case OMPRTL__tgt_target_teams: { 1573 // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 1574 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 1575 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 1576 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1577 CGM.VoidPtrTy, 1578 CGM.Int32Ty, 1579 CGM.VoidPtrPtrTy, 1580 CGM.VoidPtrPtrTy, 1581 CGM.SizeTy->getPointerTo(), 1582 CGM.Int32Ty->getPointerTo(), 1583 CGM.Int32Ty, 1584 CGM.Int32Ty}; 1585 llvm::FunctionType *FnTy = 1586 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1587 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 1588 break; 1589 } 1590 case OMPRTL__tgt_register_lib: { 1591 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 1592 QualType ParamTy = 1593 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1594 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1595 llvm::FunctionType *FnTy = 1596 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1597 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 1598 break; 1599 } 1600 case OMPRTL__tgt_unregister_lib: { 1601 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 1602 QualType ParamTy = 1603 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1604 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1605 llvm::FunctionType *FnTy = 1606 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1607 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 1608 break; 1609 } 1610 case OMPRTL__tgt_target_data_begin: { 1611 // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 1612 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1613 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1614 CGM.Int32Ty, 1615 CGM.VoidPtrPtrTy, 1616 CGM.VoidPtrPtrTy, 1617 CGM.SizeTy->getPointerTo(), 1618 CGM.Int32Ty->getPointerTo()}; 1619 llvm::FunctionType *FnTy = 1620 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1621 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 1622 break; 1623 } 1624 case OMPRTL__tgt_target_data_end: { 1625 // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 1626 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1627 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1628 CGM.Int32Ty, 1629 CGM.VoidPtrPtrTy, 1630 CGM.VoidPtrPtrTy, 1631 CGM.SizeTy->getPointerTo(), 1632 CGM.Int32Ty->getPointerTo()}; 1633 llvm::FunctionType *FnTy = 1634 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1635 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 1636 break; 1637 } 1638 case OMPRTL__tgt_target_data_update: { 1639 // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 1640 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1641 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1642 CGM.Int32Ty, 1643 CGM.VoidPtrPtrTy, 1644 CGM.VoidPtrPtrTy, 1645 CGM.SizeTy->getPointerTo(), 1646 CGM.Int32Ty->getPointerTo()}; 1647 llvm::FunctionType *FnTy = 1648 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1649 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 1650 break; 1651 } 1652 } 1653 assert(RTLFn && "Unable to find OpenMP runtime function"); 1654 return RTLFn; 1655 } 1656 1657 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 1658 bool IVSigned) { 1659 assert((IVSize == 32 || IVSize == 64) && 1660 "IV size is not compatible with the omp runtime"); 1661 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1662 : "__kmpc_for_static_init_4u") 1663 : (IVSigned ? "__kmpc_for_static_init_8" 1664 : "__kmpc_for_static_init_8u"); 1665 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1666 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1667 llvm::Type *TypeParams[] = { 1668 getIdentTyPointerTy(), // loc 1669 CGM.Int32Ty, // tid 1670 CGM.Int32Ty, // schedtype 1671 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1672 PtrTy, // p_lower 1673 PtrTy, // p_upper 1674 PtrTy, // p_stride 1675 ITy, // incr 1676 ITy // chunk 1677 }; 1678 llvm::FunctionType *FnTy = 1679 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1680 return CGM.CreateRuntimeFunction(FnTy, Name); 1681 } 1682 1683 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 1684 bool IVSigned) { 1685 assert((IVSize == 32 || IVSize == 64) && 1686 "IV size is not compatible with the omp runtime"); 1687 auto Name = 1688 IVSize == 32 1689 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1690 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1691 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1692 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1693 CGM.Int32Ty, // tid 1694 CGM.Int32Ty, // schedtype 1695 ITy, // lower 1696 ITy, // upper 1697 ITy, // stride 1698 ITy // chunk 1699 }; 1700 llvm::FunctionType *FnTy = 1701 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1702 return CGM.CreateRuntimeFunction(FnTy, Name); 1703 } 1704 1705 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1706 bool IVSigned) { 1707 assert((IVSize == 32 || IVSize == 64) && 1708 "IV size is not compatible with the omp runtime"); 1709 auto Name = 1710 IVSize == 32 1711 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1712 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1713 llvm::Type *TypeParams[] = { 1714 getIdentTyPointerTy(), // loc 1715 CGM.Int32Ty, // tid 1716 }; 1717 llvm::FunctionType *FnTy = 1718 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1719 return CGM.CreateRuntimeFunction(FnTy, Name); 1720 } 1721 1722 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1723 bool IVSigned) { 1724 assert((IVSize == 32 || IVSize == 64) && 1725 "IV size is not compatible with the omp runtime"); 1726 auto Name = 1727 IVSize == 32 1728 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1729 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1730 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1731 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1732 llvm::Type *TypeParams[] = { 1733 getIdentTyPointerTy(), // loc 1734 CGM.Int32Ty, // tid 1735 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1736 PtrTy, // p_lower 1737 PtrTy, // p_upper 1738 PtrTy // p_stride 1739 }; 1740 llvm::FunctionType *FnTy = 1741 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1742 return CGM.CreateRuntimeFunction(FnTy, Name); 1743 } 1744 1745 llvm::Constant * 1746 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1747 assert(!CGM.getLangOpts().OpenMPUseTLS || 1748 !CGM.getContext().getTargetInfo().isTLSSupported()); 1749 // Lookup the entry, lazily creating it if necessary. 1750 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1751 Twine(CGM.getMangledName(VD)) + ".cache."); 1752 } 1753 1754 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1755 const VarDecl *VD, 1756 Address VDAddr, 1757 SourceLocation Loc) { 1758 if (CGM.getLangOpts().OpenMPUseTLS && 1759 CGM.getContext().getTargetInfo().isTLSSupported()) 1760 return VDAddr; 1761 1762 auto VarTy = VDAddr.getElementType(); 1763 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1764 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1765 CGM.Int8PtrTy), 1766 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1767 getOrCreateThreadPrivateCache(VD)}; 1768 return Address(CGF.EmitRuntimeCall( 1769 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1770 VDAddr.getAlignment()); 1771 } 1772 1773 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1774 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1775 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1776 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1777 // library. 1778 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1779 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1780 OMPLoc); 1781 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1782 // to register constructor/destructor for variable. 1783 llvm::Value *Args[] = {OMPLoc, 1784 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1785 CGM.VoidPtrTy), 1786 Ctor, CopyCtor, Dtor}; 1787 CGF.EmitRuntimeCall( 1788 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1789 } 1790 1791 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1792 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1793 bool PerformInit, CodeGenFunction *CGF) { 1794 if (CGM.getLangOpts().OpenMPUseTLS && 1795 CGM.getContext().getTargetInfo().isTLSSupported()) 1796 return nullptr; 1797 1798 VD = VD->getDefinition(CGM.getContext()); 1799 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1800 ThreadPrivateWithDefinition.insert(VD); 1801 QualType ASTTy = VD->getType(); 1802 1803 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1804 auto Init = VD->getAnyInitializer(); 1805 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1806 // Generate function that re-emits the declaration's initializer into the 1807 // threadprivate copy of the variable VD 1808 CodeGenFunction CtorCGF(CGM); 1809 FunctionArgList Args; 1810 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1811 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1812 Args.push_back(&Dst); 1813 1814 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1815 CGM.getContext().VoidPtrTy, Args); 1816 auto FTy = CGM.getTypes().GetFunctionType(FI); 1817 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1818 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1819 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1820 Args, SourceLocation()); 1821 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1822 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1823 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1824 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1825 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1826 CtorCGF.ConvertTypeForMem(ASTTy)); 1827 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1828 /*IsInitializer=*/true); 1829 ArgVal = CtorCGF.EmitLoadOfScalar( 1830 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1831 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1832 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1833 CtorCGF.FinishFunction(); 1834 Ctor = Fn; 1835 } 1836 if (VD->getType().isDestructedType() != QualType::DK_none) { 1837 // Generate function that emits destructor call for the threadprivate copy 1838 // of the variable VD 1839 CodeGenFunction DtorCGF(CGM); 1840 FunctionArgList Args; 1841 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1842 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1843 Args.push_back(&Dst); 1844 1845 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1846 CGM.getContext().VoidTy, Args); 1847 auto FTy = CGM.getTypes().GetFunctionType(FI); 1848 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1849 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1850 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1851 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1852 SourceLocation()); 1853 // Create a scope with an artificial location for the body of this function. 1854 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1855 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1856 DtorCGF.GetAddrOfLocalVar(&Dst), 1857 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1858 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1859 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1860 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1861 DtorCGF.FinishFunction(); 1862 Dtor = Fn; 1863 } 1864 // Do not emit init function if it is not required. 1865 if (!Ctor && !Dtor) 1866 return nullptr; 1867 1868 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1869 auto CopyCtorTy = 1870 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1871 /*isVarArg=*/false)->getPointerTo(); 1872 // Copying constructor for the threadprivate variable. 1873 // Must be NULL - reserved by runtime, but currently it requires that this 1874 // parameter is always NULL. Otherwise it fires assertion. 1875 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1876 if (Ctor == nullptr) { 1877 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1878 /*isVarArg=*/false)->getPointerTo(); 1879 Ctor = llvm::Constant::getNullValue(CtorTy); 1880 } 1881 if (Dtor == nullptr) { 1882 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1883 /*isVarArg=*/false)->getPointerTo(); 1884 Dtor = llvm::Constant::getNullValue(DtorTy); 1885 } 1886 if (!CGF) { 1887 auto InitFunctionTy = 1888 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1889 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1890 InitFunctionTy, ".__omp_threadprivate_init_.", 1891 CGM.getTypes().arrangeNullaryFunction()); 1892 CodeGenFunction InitCGF(CGM); 1893 FunctionArgList ArgList; 1894 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1895 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1896 Loc); 1897 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1898 InitCGF.FinishFunction(); 1899 return InitFunction; 1900 } 1901 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1902 } 1903 return nullptr; 1904 } 1905 1906 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1907 /// function. Here is the logic: 1908 /// if (Cond) { 1909 /// ThenGen(); 1910 /// } else { 1911 /// ElseGen(); 1912 /// } 1913 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1914 const RegionCodeGenTy &ThenGen, 1915 const RegionCodeGenTy &ElseGen) { 1916 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1917 1918 // If the condition constant folds and can be elided, try to avoid emitting 1919 // the condition and the dead arm of the if/else. 1920 bool CondConstant; 1921 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1922 if (CondConstant) 1923 ThenGen(CGF); 1924 else 1925 ElseGen(CGF); 1926 return; 1927 } 1928 1929 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1930 // emit the conditional branch. 1931 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1932 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1933 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1934 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1935 1936 // Emit the 'then' code. 1937 CGF.EmitBlock(ThenBlock); 1938 ThenGen(CGF); 1939 CGF.EmitBranch(ContBlock); 1940 // Emit the 'else' code if present. 1941 // There is no need to emit line number for unconditional branch. 1942 (void)ApplyDebugLocation::CreateEmpty(CGF); 1943 CGF.EmitBlock(ElseBlock); 1944 ElseGen(CGF); 1945 // There is no need to emit line number for unconditional branch. 1946 (void)ApplyDebugLocation::CreateEmpty(CGF); 1947 CGF.EmitBranch(ContBlock); 1948 // Emit the continuation block for code after the if. 1949 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1950 } 1951 1952 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1953 llvm::Value *OutlinedFn, 1954 ArrayRef<llvm::Value *> CapturedVars, 1955 const Expr *IfCond) { 1956 if (!CGF.HaveInsertPoint()) 1957 return; 1958 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1959 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 1960 PrePostActionTy &) { 1961 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1962 auto &RT = CGF.CGM.getOpenMPRuntime(); 1963 llvm::Value *Args[] = { 1964 RTLoc, 1965 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1966 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1967 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1968 RealArgs.append(std::begin(Args), std::end(Args)); 1969 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1970 1971 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 1972 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1973 }; 1974 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 1975 PrePostActionTy &) { 1976 auto &RT = CGF.CGM.getOpenMPRuntime(); 1977 auto ThreadID = RT.getThreadID(CGF, Loc); 1978 // Build calls: 1979 // __kmpc_serialized_parallel(&Loc, GTid); 1980 llvm::Value *Args[] = {RTLoc, ThreadID}; 1981 CGF.EmitRuntimeCall( 1982 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 1983 1984 // OutlinedFn(>id, &zero, CapturedStruct); 1985 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1986 Address ZeroAddr = 1987 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1988 /*Name*/ ".zero.addr"); 1989 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1990 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1991 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1992 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1993 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1994 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1995 1996 // __kmpc_end_serialized_parallel(&Loc, GTid); 1997 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1998 CGF.EmitRuntimeCall( 1999 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2000 EndArgs); 2001 }; 2002 if (IfCond) 2003 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2004 else { 2005 RegionCodeGenTy ThenRCG(ThenGen); 2006 ThenRCG(CGF); 2007 } 2008 } 2009 2010 // If we're inside an (outlined) parallel region, use the region info's 2011 // thread-ID variable (it is passed in a first argument of the outlined function 2012 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2013 // regular serial code region, get thread ID by calling kmp_int32 2014 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2015 // return the address of that temp. 2016 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2017 SourceLocation Loc) { 2018 if (auto *OMPRegionInfo = 2019 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2020 if (OMPRegionInfo->getThreadIDVariable()) 2021 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2022 2023 auto ThreadID = getThreadID(CGF, Loc); 2024 auto Int32Ty = 2025 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2026 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2027 CGF.EmitStoreOfScalar(ThreadID, 2028 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2029 2030 return ThreadIDTemp; 2031 } 2032 2033 llvm::Constant * 2034 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2035 const llvm::Twine &Name) { 2036 SmallString<256> Buffer; 2037 llvm::raw_svector_ostream Out(Buffer); 2038 Out << Name; 2039 auto RuntimeName = Out.str(); 2040 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 2041 if (Elem.second) { 2042 assert(Elem.second->getType()->getPointerElementType() == Ty && 2043 "OMP internal variable has different type than requested"); 2044 return &*Elem.second; 2045 } 2046 2047 return Elem.second = new llvm::GlobalVariable( 2048 CGM.getModule(), Ty, /*IsConstant*/ false, 2049 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2050 Elem.first()); 2051 } 2052 2053 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2054 llvm::Twine Name(".gomp_critical_user_", CriticalName); 2055 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 2056 } 2057 2058 namespace { 2059 /// Common pre(post)-action for different OpenMP constructs. 2060 class CommonActionTy final : public PrePostActionTy { 2061 llvm::Value *EnterCallee; 2062 ArrayRef<llvm::Value *> EnterArgs; 2063 llvm::Value *ExitCallee; 2064 ArrayRef<llvm::Value *> ExitArgs; 2065 bool Conditional; 2066 llvm::BasicBlock *ContBlock = nullptr; 2067 2068 public: 2069 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2070 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2071 bool Conditional = false) 2072 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2073 ExitArgs(ExitArgs), Conditional(Conditional) {} 2074 void Enter(CodeGenFunction &CGF) override { 2075 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2076 if (Conditional) { 2077 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2078 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2079 ContBlock = CGF.createBasicBlock("omp_if.end"); 2080 // Generate the branch (If-stmt) 2081 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2082 CGF.EmitBlock(ThenBlock); 2083 } 2084 } 2085 void Done(CodeGenFunction &CGF) { 2086 // Emit the rest of blocks/branches 2087 CGF.EmitBranch(ContBlock); 2088 CGF.EmitBlock(ContBlock, true); 2089 } 2090 void Exit(CodeGenFunction &CGF) override { 2091 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2092 } 2093 }; 2094 } // anonymous namespace 2095 2096 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2097 StringRef CriticalName, 2098 const RegionCodeGenTy &CriticalOpGen, 2099 SourceLocation Loc, const Expr *Hint) { 2100 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2101 // CriticalOpGen(); 2102 // __kmpc_end_critical(ident_t *, gtid, Lock); 2103 // Prepare arguments and build a call to __kmpc_critical 2104 if (!CGF.HaveInsertPoint()) 2105 return; 2106 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2107 getCriticalRegionLock(CriticalName)}; 2108 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2109 std::end(Args)); 2110 if (Hint) { 2111 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2112 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2113 } 2114 CommonActionTy Action( 2115 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2116 : OMPRTL__kmpc_critical), 2117 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2118 CriticalOpGen.setAction(Action); 2119 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2120 } 2121 2122 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2123 const RegionCodeGenTy &MasterOpGen, 2124 SourceLocation Loc) { 2125 if (!CGF.HaveInsertPoint()) 2126 return; 2127 // if(__kmpc_master(ident_t *, gtid)) { 2128 // MasterOpGen(); 2129 // __kmpc_end_master(ident_t *, gtid); 2130 // } 2131 // Prepare arguments and build a call to __kmpc_master 2132 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2133 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2134 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2135 /*Conditional=*/true); 2136 MasterOpGen.setAction(Action); 2137 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2138 Action.Done(CGF); 2139 } 2140 2141 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2142 SourceLocation Loc) { 2143 if (!CGF.HaveInsertPoint()) 2144 return; 2145 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2146 llvm::Value *Args[] = { 2147 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2148 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2149 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2150 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2151 Region->emitUntiedSwitch(CGF); 2152 } 2153 2154 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2155 const RegionCodeGenTy &TaskgroupOpGen, 2156 SourceLocation Loc) { 2157 if (!CGF.HaveInsertPoint()) 2158 return; 2159 // __kmpc_taskgroup(ident_t *, gtid); 2160 // TaskgroupOpGen(); 2161 // __kmpc_end_taskgroup(ident_t *, gtid); 2162 // Prepare arguments and build a call to __kmpc_taskgroup 2163 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2164 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2165 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2166 Args); 2167 TaskgroupOpGen.setAction(Action); 2168 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2169 } 2170 2171 /// Given an array of pointers to variables, project the address of a 2172 /// given variable. 2173 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2174 unsigned Index, const VarDecl *Var) { 2175 // Pull out the pointer to the variable. 2176 Address PtrAddr = 2177 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2178 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2179 2180 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2181 Addr = CGF.Builder.CreateElementBitCast( 2182 Addr, CGF.ConvertTypeForMem(Var->getType())); 2183 return Addr; 2184 } 2185 2186 static llvm::Value *emitCopyprivateCopyFunction( 2187 CodeGenModule &CGM, llvm::Type *ArgsType, 2188 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2189 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 2190 auto &C = CGM.getContext(); 2191 // void copy_func(void *LHSArg, void *RHSArg); 2192 FunctionArgList Args; 2193 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2194 C.VoidPtrTy); 2195 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2196 C.VoidPtrTy); 2197 Args.push_back(&LHSArg); 2198 Args.push_back(&RHSArg); 2199 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2200 auto *Fn = llvm::Function::Create( 2201 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2202 ".omp.copyprivate.copy_func", &CGM.getModule()); 2203 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2204 CodeGenFunction CGF(CGM); 2205 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2206 // Dest = (void*[n])(LHSArg); 2207 // Src = (void*[n])(RHSArg); 2208 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2209 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2210 ArgsType), CGF.getPointerAlign()); 2211 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2212 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2213 ArgsType), CGF.getPointerAlign()); 2214 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2215 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2216 // ... 2217 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2218 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2219 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2220 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2221 2222 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2223 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2224 2225 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2226 QualType Type = VD->getType(); 2227 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2228 } 2229 CGF.FinishFunction(); 2230 return Fn; 2231 } 2232 2233 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2234 const RegionCodeGenTy &SingleOpGen, 2235 SourceLocation Loc, 2236 ArrayRef<const Expr *> CopyprivateVars, 2237 ArrayRef<const Expr *> SrcExprs, 2238 ArrayRef<const Expr *> DstExprs, 2239 ArrayRef<const Expr *> AssignmentOps) { 2240 if (!CGF.HaveInsertPoint()) 2241 return; 2242 assert(CopyprivateVars.size() == SrcExprs.size() && 2243 CopyprivateVars.size() == DstExprs.size() && 2244 CopyprivateVars.size() == AssignmentOps.size()); 2245 auto &C = CGM.getContext(); 2246 // int32 did_it = 0; 2247 // if(__kmpc_single(ident_t *, gtid)) { 2248 // SingleOpGen(); 2249 // __kmpc_end_single(ident_t *, gtid); 2250 // did_it = 1; 2251 // } 2252 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2253 // <copy_func>, did_it); 2254 2255 Address DidIt = Address::invalid(); 2256 if (!CopyprivateVars.empty()) { 2257 // int32 did_it = 0; 2258 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2259 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2260 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2261 } 2262 // Prepare arguments and build a call to __kmpc_single 2263 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2264 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2265 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2266 /*Conditional=*/true); 2267 SingleOpGen.setAction(Action); 2268 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2269 if (DidIt.isValid()) { 2270 // did_it = 1; 2271 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2272 } 2273 Action.Done(CGF); 2274 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2275 // <copy_func>, did_it); 2276 if (DidIt.isValid()) { 2277 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2278 auto CopyprivateArrayTy = 2279 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2280 /*IndexTypeQuals=*/0); 2281 // Create a list of all private variables for copyprivate. 2282 Address CopyprivateList = 2283 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2284 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2285 Address Elem = CGF.Builder.CreateConstArrayGEP( 2286 CopyprivateList, I, CGF.getPointerSize()); 2287 CGF.Builder.CreateStore( 2288 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2289 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2290 Elem); 2291 } 2292 // Build function that copies private values from single region to all other 2293 // threads in the corresponding parallel region. 2294 auto *CpyFn = emitCopyprivateCopyFunction( 2295 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2296 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 2297 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2298 Address CL = 2299 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2300 CGF.VoidPtrTy); 2301 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2302 llvm::Value *Args[] = { 2303 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2304 getThreadID(CGF, Loc), // i32 <gtid> 2305 BufSize, // size_t <buf_size> 2306 CL.getPointer(), // void *<copyprivate list> 2307 CpyFn, // void (*) (void *, void *) <copy_func> 2308 DidItVal // i32 did_it 2309 }; 2310 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2311 } 2312 } 2313 2314 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2315 const RegionCodeGenTy &OrderedOpGen, 2316 SourceLocation Loc, bool IsThreads) { 2317 if (!CGF.HaveInsertPoint()) 2318 return; 2319 // __kmpc_ordered(ident_t *, gtid); 2320 // OrderedOpGen(); 2321 // __kmpc_end_ordered(ident_t *, gtid); 2322 // Prepare arguments and build a call to __kmpc_ordered 2323 if (IsThreads) { 2324 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2325 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2326 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2327 Args); 2328 OrderedOpGen.setAction(Action); 2329 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2330 return; 2331 } 2332 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2333 } 2334 2335 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2336 OpenMPDirectiveKind Kind, bool EmitChecks, 2337 bool ForceSimpleCall) { 2338 if (!CGF.HaveInsertPoint()) 2339 return; 2340 // Build call __kmpc_cancel_barrier(loc, thread_id); 2341 // Build call __kmpc_barrier(loc, thread_id); 2342 unsigned Flags; 2343 if (Kind == OMPD_for) 2344 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2345 else if (Kind == OMPD_sections) 2346 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2347 else if (Kind == OMPD_single) 2348 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2349 else if (Kind == OMPD_barrier) 2350 Flags = OMP_IDENT_BARRIER_EXPL; 2351 else 2352 Flags = OMP_IDENT_BARRIER_IMPL; 2353 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2354 // thread_id); 2355 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2356 getThreadID(CGF, Loc)}; 2357 if (auto *OMPRegionInfo = 2358 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2359 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2360 auto *Result = CGF.EmitRuntimeCall( 2361 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2362 if (EmitChecks) { 2363 // if (__kmpc_cancel_barrier()) { 2364 // exit from construct; 2365 // } 2366 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2367 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2368 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2369 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2370 CGF.EmitBlock(ExitBB); 2371 // exit from construct; 2372 auto CancelDestination = 2373 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2374 CGF.EmitBranchThroughCleanup(CancelDestination); 2375 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2376 } 2377 return; 2378 } 2379 } 2380 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2381 } 2382 2383 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2384 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2385 bool Chunked, bool Ordered) { 2386 switch (ScheduleKind) { 2387 case OMPC_SCHEDULE_static: 2388 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2389 : (Ordered ? OMP_ord_static : OMP_sch_static); 2390 case OMPC_SCHEDULE_dynamic: 2391 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2392 case OMPC_SCHEDULE_guided: 2393 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2394 case OMPC_SCHEDULE_runtime: 2395 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2396 case OMPC_SCHEDULE_auto: 2397 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2398 case OMPC_SCHEDULE_unknown: 2399 assert(!Chunked && "chunk was specified but schedule kind not known"); 2400 return Ordered ? OMP_ord_static : OMP_sch_static; 2401 } 2402 llvm_unreachable("Unexpected runtime schedule"); 2403 } 2404 2405 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 2406 static OpenMPSchedType 2407 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2408 // only static is allowed for dist_schedule 2409 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2410 } 2411 2412 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2413 bool Chunked) const { 2414 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2415 return Schedule == OMP_sch_static; 2416 } 2417 2418 bool CGOpenMPRuntime::isStaticNonchunked( 2419 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2420 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2421 return Schedule == OMP_dist_sch_static; 2422 } 2423 2424 2425 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2426 auto Schedule = 2427 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2428 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2429 return Schedule != OMP_sch_static; 2430 } 2431 2432 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 2433 OpenMPScheduleClauseModifier M1, 2434 OpenMPScheduleClauseModifier M2) { 2435 int Modifier = 0; 2436 switch (M1) { 2437 case OMPC_SCHEDULE_MODIFIER_monotonic: 2438 Modifier = OMP_sch_modifier_monotonic; 2439 break; 2440 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2441 Modifier = OMP_sch_modifier_nonmonotonic; 2442 break; 2443 case OMPC_SCHEDULE_MODIFIER_simd: 2444 if (Schedule == OMP_sch_static_chunked) 2445 Schedule = OMP_sch_static_balanced_chunked; 2446 break; 2447 case OMPC_SCHEDULE_MODIFIER_last: 2448 case OMPC_SCHEDULE_MODIFIER_unknown: 2449 break; 2450 } 2451 switch (M2) { 2452 case OMPC_SCHEDULE_MODIFIER_monotonic: 2453 Modifier = OMP_sch_modifier_monotonic; 2454 break; 2455 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2456 Modifier = OMP_sch_modifier_nonmonotonic; 2457 break; 2458 case OMPC_SCHEDULE_MODIFIER_simd: 2459 if (Schedule == OMP_sch_static_chunked) 2460 Schedule = OMP_sch_static_balanced_chunked; 2461 break; 2462 case OMPC_SCHEDULE_MODIFIER_last: 2463 case OMPC_SCHEDULE_MODIFIER_unknown: 2464 break; 2465 } 2466 return Schedule | Modifier; 2467 } 2468 2469 void CGOpenMPRuntime::emitForDispatchInit( 2470 CodeGenFunction &CGF, SourceLocation Loc, 2471 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2472 bool Ordered, const DispatchRTInput &DispatchValues) { 2473 if (!CGF.HaveInsertPoint()) 2474 return; 2475 OpenMPSchedType Schedule = getRuntimeSchedule( 2476 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2477 assert(Ordered || 2478 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2479 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2480 Schedule != OMP_sch_static_balanced_chunked)); 2481 // Call __kmpc_dispatch_init( 2482 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2483 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2484 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2485 2486 // If the Chunk was not specified in the clause - use default value 1. 2487 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2488 : CGF.Builder.getIntN(IVSize, 1); 2489 llvm::Value *Args[] = { 2490 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2491 CGF.Builder.getInt32(addMonoNonMonoModifier( 2492 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2493 DispatchValues.LB, // Lower 2494 DispatchValues.UB, // Upper 2495 CGF.Builder.getIntN(IVSize, 1), // Stride 2496 Chunk // Chunk 2497 }; 2498 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2499 } 2500 2501 static void emitForStaticInitCall( 2502 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2503 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 2504 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2505 unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, 2506 Address ST, llvm::Value *Chunk) { 2507 if (!CGF.HaveInsertPoint()) 2508 return; 2509 2510 assert(!Ordered); 2511 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2512 Schedule == OMP_sch_static_balanced_chunked || 2513 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2514 Schedule == OMP_dist_sch_static || 2515 Schedule == OMP_dist_sch_static_chunked); 2516 2517 // Call __kmpc_for_static_init( 2518 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2519 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2520 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2521 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2522 if (Chunk == nullptr) { 2523 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2524 Schedule == OMP_dist_sch_static) && 2525 "expected static non-chunked schedule"); 2526 // If the Chunk was not specified in the clause - use default value 1. 2527 Chunk = CGF.Builder.getIntN(IVSize, 1); 2528 } else { 2529 assert((Schedule == OMP_sch_static_chunked || 2530 Schedule == OMP_sch_static_balanced_chunked || 2531 Schedule == OMP_ord_static_chunked || 2532 Schedule == OMP_dist_sch_static_chunked) && 2533 "expected static chunked schedule"); 2534 } 2535 llvm::Value *Args[] = { 2536 UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( 2537 Schedule, M1, M2)), // Schedule type 2538 IL.getPointer(), // &isLastIter 2539 LB.getPointer(), // &LB 2540 UB.getPointer(), // &UB 2541 ST.getPointer(), // &Stride 2542 CGF.Builder.getIntN(IVSize, 1), // Incr 2543 Chunk // Chunk 2544 }; 2545 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2546 } 2547 2548 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2549 SourceLocation Loc, 2550 const OpenMPScheduleTy &ScheduleKind, 2551 unsigned IVSize, bool IVSigned, 2552 bool Ordered, Address IL, Address LB, 2553 Address UB, Address ST, 2554 llvm::Value *Chunk) { 2555 OpenMPSchedType ScheduleNum = 2556 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 2557 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2558 auto *ThreadId = getThreadID(CGF, Loc); 2559 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2560 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2561 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, 2562 Ordered, IL, LB, UB, ST, Chunk); 2563 } 2564 2565 void CGOpenMPRuntime::emitDistributeStaticInit( 2566 CodeGenFunction &CGF, SourceLocation Loc, 2567 OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, 2568 bool Ordered, Address IL, Address LB, Address UB, Address ST, 2569 llvm::Value *Chunk) { 2570 OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); 2571 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2572 auto *ThreadId = getThreadID(CGF, Loc); 2573 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2574 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2575 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2576 OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, 2577 UB, ST, Chunk); 2578 } 2579 2580 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2581 SourceLocation Loc) { 2582 if (!CGF.HaveInsertPoint()) 2583 return; 2584 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2585 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2586 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 2587 Args); 2588 } 2589 2590 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2591 SourceLocation Loc, 2592 unsigned IVSize, 2593 bool IVSigned) { 2594 if (!CGF.HaveInsertPoint()) 2595 return; 2596 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2597 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2598 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2599 } 2600 2601 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2602 SourceLocation Loc, unsigned IVSize, 2603 bool IVSigned, Address IL, 2604 Address LB, Address UB, 2605 Address ST) { 2606 // Call __kmpc_dispatch_next( 2607 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2608 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2609 // kmp_int[32|64] *p_stride); 2610 llvm::Value *Args[] = { 2611 emitUpdateLocation(CGF, Loc), 2612 getThreadID(CGF, Loc), 2613 IL.getPointer(), // &isLastIter 2614 LB.getPointer(), // &Lower 2615 UB.getPointer(), // &Upper 2616 ST.getPointer() // &Stride 2617 }; 2618 llvm::Value *Call = 2619 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2620 return CGF.EmitScalarConversion( 2621 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 2622 CGF.getContext().BoolTy, Loc); 2623 } 2624 2625 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2626 llvm::Value *NumThreads, 2627 SourceLocation Loc) { 2628 if (!CGF.HaveInsertPoint()) 2629 return; 2630 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2631 llvm::Value *Args[] = { 2632 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2633 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2634 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 2635 Args); 2636 } 2637 2638 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2639 OpenMPProcBindClauseKind ProcBind, 2640 SourceLocation Loc) { 2641 if (!CGF.HaveInsertPoint()) 2642 return; 2643 // Constants for proc bind value accepted by the runtime. 2644 enum ProcBindTy { 2645 ProcBindFalse = 0, 2646 ProcBindTrue, 2647 ProcBindMaster, 2648 ProcBindClose, 2649 ProcBindSpread, 2650 ProcBindIntel, 2651 ProcBindDefault 2652 } RuntimeProcBind; 2653 switch (ProcBind) { 2654 case OMPC_PROC_BIND_master: 2655 RuntimeProcBind = ProcBindMaster; 2656 break; 2657 case OMPC_PROC_BIND_close: 2658 RuntimeProcBind = ProcBindClose; 2659 break; 2660 case OMPC_PROC_BIND_spread: 2661 RuntimeProcBind = ProcBindSpread; 2662 break; 2663 case OMPC_PROC_BIND_unknown: 2664 llvm_unreachable("Unsupported proc_bind value."); 2665 } 2666 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2667 llvm::Value *Args[] = { 2668 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2669 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 2670 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 2671 } 2672 2673 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2674 SourceLocation Loc) { 2675 if (!CGF.HaveInsertPoint()) 2676 return; 2677 // Build call void __kmpc_flush(ident_t *loc) 2678 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 2679 emitUpdateLocation(CGF, Loc)); 2680 } 2681 2682 namespace { 2683 /// \brief Indexes of fields for type kmp_task_t. 2684 enum KmpTaskTFields { 2685 /// \brief List of shared variables. 2686 KmpTaskTShareds, 2687 /// \brief Task routine. 2688 KmpTaskTRoutine, 2689 /// \brief Partition id for the untied tasks. 2690 KmpTaskTPartId, 2691 /// Function with call of destructors for private variables. 2692 Data1, 2693 /// Task priority. 2694 Data2, 2695 /// (Taskloops only) Lower bound. 2696 KmpTaskTLowerBound, 2697 /// (Taskloops only) Upper bound. 2698 KmpTaskTUpperBound, 2699 /// (Taskloops only) Stride. 2700 KmpTaskTStride, 2701 /// (Taskloops only) Is last iteration flag. 2702 KmpTaskTLastIter, 2703 }; 2704 } // anonymous namespace 2705 2706 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2707 // FIXME: Add other entries type when they become supported. 2708 return OffloadEntriesTargetRegion.empty(); 2709 } 2710 2711 /// \brief Initialize target region entry. 2712 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2713 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2714 StringRef ParentName, unsigned LineNum, 2715 unsigned Order) { 2716 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2717 "only required for the device " 2718 "code generation."); 2719 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2720 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2721 /*Flags=*/0); 2722 ++OffloadingEntriesNum; 2723 } 2724 2725 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2726 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2727 StringRef ParentName, unsigned LineNum, 2728 llvm::Constant *Addr, llvm::Constant *ID, 2729 int32_t Flags) { 2730 // If we are emitting code for a target, the entry is already initialized, 2731 // only has to be registered. 2732 if (CGM.getLangOpts().OpenMPIsDevice) { 2733 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2734 "Entry must exist."); 2735 auto &Entry = 2736 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2737 assert(Entry.isValid() && "Entry not initialized!"); 2738 Entry.setAddress(Addr); 2739 Entry.setID(ID); 2740 Entry.setFlags(Flags); 2741 return; 2742 } else { 2743 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); 2744 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2745 } 2746 } 2747 2748 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2749 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2750 unsigned LineNum) const { 2751 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2752 if (PerDevice == OffloadEntriesTargetRegion.end()) 2753 return false; 2754 auto PerFile = PerDevice->second.find(FileID); 2755 if (PerFile == PerDevice->second.end()) 2756 return false; 2757 auto PerParentName = PerFile->second.find(ParentName); 2758 if (PerParentName == PerFile->second.end()) 2759 return false; 2760 auto PerLine = PerParentName->second.find(LineNum); 2761 if (PerLine == PerParentName->second.end()) 2762 return false; 2763 // Fail if this entry is already registered. 2764 if (PerLine->second.getAddress() || PerLine->second.getID()) 2765 return false; 2766 return true; 2767 } 2768 2769 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2770 const OffloadTargetRegionEntryInfoActTy &Action) { 2771 // Scan all target region entries and perform the provided action. 2772 for (auto &D : OffloadEntriesTargetRegion) 2773 for (auto &F : D.second) 2774 for (auto &P : F.second) 2775 for (auto &L : P.second) 2776 Action(D.first, F.first, P.first(), L.first, L.second); 2777 } 2778 2779 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 2780 /// \a Codegen. This is used to emit the two functions that register and 2781 /// unregister the descriptor of the current compilation unit. 2782 static llvm::Function * 2783 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 2784 const RegionCodeGenTy &Codegen) { 2785 auto &C = CGM.getContext(); 2786 FunctionArgList Args; 2787 ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), 2788 /*Id=*/nullptr, C.VoidPtrTy); 2789 Args.push_back(&DummyPtr); 2790 2791 CodeGenFunction CGF(CGM); 2792 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2793 auto FTy = CGM.getTypes().GetFunctionType(FI); 2794 auto *Fn = 2795 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 2796 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 2797 Codegen(CGF); 2798 CGF.FinishFunction(); 2799 return Fn; 2800 } 2801 2802 llvm::Function * 2803 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 2804 2805 // If we don't have entries or if we are emitting code for the device, we 2806 // don't need to do anything. 2807 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 2808 return nullptr; 2809 2810 auto &M = CGM.getModule(); 2811 auto &C = CGM.getContext(); 2812 2813 // Get list of devices we care about 2814 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 2815 2816 // We should be creating an offloading descriptor only if there are devices 2817 // specified. 2818 assert(!Devices.empty() && "No OpenMP offloading devices??"); 2819 2820 // Create the external variables that will point to the begin and end of the 2821 // host entries section. These will be defined by the linker. 2822 auto *OffloadEntryTy = 2823 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 2824 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 2825 M, OffloadEntryTy, /*isConstant=*/true, 2826 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2827 ".omp_offloading.entries_begin"); 2828 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 2829 M, OffloadEntryTy, /*isConstant=*/true, 2830 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2831 ".omp_offloading.entries_end"); 2832 2833 // Create all device images 2834 auto *DeviceImageTy = cast<llvm::StructType>( 2835 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 2836 ConstantInitBuilder DeviceImagesBuilder(CGM); 2837 auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); 2838 2839 for (unsigned i = 0; i < Devices.size(); ++i) { 2840 StringRef T = Devices[i].getTriple(); 2841 auto *ImgBegin = new llvm::GlobalVariable( 2842 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2843 /*Initializer=*/nullptr, 2844 Twine(".omp_offloading.img_start.") + Twine(T)); 2845 auto *ImgEnd = new llvm::GlobalVariable( 2846 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2847 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 2848 2849 auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); 2850 Dev.add(ImgBegin); 2851 Dev.add(ImgEnd); 2852 Dev.add(HostEntriesBegin); 2853 Dev.add(HostEntriesEnd); 2854 Dev.finishAndAddTo(DeviceImagesEntries); 2855 } 2856 2857 // Create device images global array. 2858 llvm::GlobalVariable *DeviceImages = 2859 DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", 2860 CGM.getPointerAlign(), 2861 /*isConstant=*/true); 2862 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2863 2864 // This is a Zero array to be used in the creation of the constant expressions 2865 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 2866 llvm::Constant::getNullValue(CGM.Int32Ty)}; 2867 2868 // Create the target region descriptor. 2869 auto *BinaryDescriptorTy = cast<llvm::StructType>( 2870 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 2871 ConstantInitBuilder DescBuilder(CGM); 2872 auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); 2873 DescInit.addInt(CGM.Int32Ty, Devices.size()); 2874 DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 2875 DeviceImages, 2876 Index)); 2877 DescInit.add(HostEntriesBegin); 2878 DescInit.add(HostEntriesEnd); 2879 2880 auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", 2881 CGM.getPointerAlign(), 2882 /*isConstant=*/true); 2883 2884 // Emit code to register or unregister the descriptor at execution 2885 // startup or closing, respectively. 2886 2887 // Create a variable to drive the registration and unregistration of the 2888 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 2889 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 2890 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 2891 IdentInfo, C.CharTy); 2892 2893 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 2894 CGM, ".omp_offloading.descriptor_unreg", 2895 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2896 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 2897 Desc); 2898 }); 2899 auto *RegFn = createOffloadingBinaryDescriptorFunction( 2900 CGM, ".omp_offloading.descriptor_reg", 2901 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2902 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 2903 Desc); 2904 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 2905 }); 2906 return RegFn; 2907 } 2908 2909 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 2910 llvm::Constant *Addr, uint64_t Size, 2911 int32_t Flags) { 2912 StringRef Name = Addr->getName(); 2913 auto *TgtOffloadEntryType = cast<llvm::StructType>( 2914 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 2915 llvm::LLVMContext &C = CGM.getModule().getContext(); 2916 llvm::Module &M = CGM.getModule(); 2917 2918 // Make sure the address has the right type. 2919 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 2920 2921 // Create constant string with the name. 2922 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 2923 2924 llvm::GlobalVariable *Str = 2925 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 2926 llvm::GlobalValue::InternalLinkage, StrPtrInit, 2927 ".omp_offloading.entry_name"); 2928 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2929 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 2930 2931 // We can't have any padding between symbols, so we need to have 1-byte 2932 // alignment. 2933 auto Align = CharUnits::fromQuantity(1); 2934 2935 // Create the entry struct. 2936 ConstantInitBuilder EntryBuilder(CGM); 2937 auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); 2938 EntryInit.add(AddrPtr); 2939 EntryInit.add(StrPtr); 2940 EntryInit.addInt(CGM.SizeTy, Size); 2941 EntryInit.addInt(CGM.Int32Ty, Flags); 2942 EntryInit.addInt(CGM.Int32Ty, 0); 2943 llvm::GlobalVariable *Entry = 2944 EntryInit.finishAndCreateGlobal(".omp_offloading.entry", 2945 Align, 2946 /*constant*/ true, 2947 llvm::GlobalValue::ExternalLinkage); 2948 2949 // The entry has to be created in the section the linker expects it to be. 2950 Entry->setSection(".omp_offloading.entries"); 2951 } 2952 2953 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2954 // Emit the offloading entries and metadata so that the device codegen side 2955 // can easily figure out what to emit. The produced metadata looks like 2956 // this: 2957 // 2958 // !omp_offload.info = !{!1, ...} 2959 // 2960 // Right now we only generate metadata for function that contain target 2961 // regions. 2962 2963 // If we do not have entries, we dont need to do anything. 2964 if (OffloadEntriesInfoManager.empty()) 2965 return; 2966 2967 llvm::Module &M = CGM.getModule(); 2968 llvm::LLVMContext &C = M.getContext(); 2969 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 2970 OrderedEntries(OffloadEntriesInfoManager.size()); 2971 2972 // Create the offloading info metadata node. 2973 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 2974 2975 // Auxiliary methods to create metadata values and strings. 2976 auto getMDInt = [&](unsigned v) { 2977 return llvm::ConstantAsMetadata::get( 2978 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 2979 }; 2980 2981 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 2982 2983 // Create function that emits metadata for each target region entry; 2984 auto &&TargetRegionMetadataEmitter = [&]( 2985 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 2986 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 2987 llvm::SmallVector<llvm::Metadata *, 32> Ops; 2988 // Generate metadata for target regions. Each entry of this metadata 2989 // contains: 2990 // - Entry 0 -> Kind of this type of metadata (0). 2991 // - Entry 1 -> Device ID of the file where the entry was identified. 2992 // - Entry 2 -> File ID of the file where the entry was identified. 2993 // - Entry 3 -> Mangled name of the function where the entry was identified. 2994 // - Entry 4 -> Line in the file where the entry was identified. 2995 // - Entry 5 -> Order the entry was created. 2996 // The first element of the metadata node is the kind. 2997 Ops.push_back(getMDInt(E.getKind())); 2998 Ops.push_back(getMDInt(DeviceID)); 2999 Ops.push_back(getMDInt(FileID)); 3000 Ops.push_back(getMDString(ParentName)); 3001 Ops.push_back(getMDInt(Line)); 3002 Ops.push_back(getMDInt(E.getOrder())); 3003 3004 // Save this entry in the right position of the ordered entries array. 3005 OrderedEntries[E.getOrder()] = &E; 3006 3007 // Add metadata to the named metadata node. 3008 MD->addOperand(llvm::MDNode::get(C, Ops)); 3009 }; 3010 3011 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3012 TargetRegionMetadataEmitter); 3013 3014 for (auto *E : OrderedEntries) { 3015 assert(E && "All ordered entries must exist!"); 3016 if (auto *CE = 3017 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3018 E)) { 3019 assert(CE->getID() && CE->getAddress() && 3020 "Entry ID and Addr are invalid!"); 3021 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 3022 } else 3023 llvm_unreachable("Unsupported entry kind."); 3024 } 3025 } 3026 3027 /// \brief Loads all the offload entries information from the host IR 3028 /// metadata. 3029 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3030 // If we are in target mode, load the metadata from the host IR. This code has 3031 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3032 3033 if (!CGM.getLangOpts().OpenMPIsDevice) 3034 return; 3035 3036 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3037 return; 3038 3039 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3040 if (Buf.getError()) 3041 return; 3042 3043 llvm::LLVMContext C; 3044 auto ME = expectedToErrorOrAndEmitErrors( 3045 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3046 3047 if (ME.getError()) 3048 return; 3049 3050 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3051 if (!MD) 3052 return; 3053 3054 for (auto I : MD->operands()) { 3055 llvm::MDNode *MN = cast<llvm::MDNode>(I); 3056 3057 auto getMDInt = [&](unsigned Idx) { 3058 llvm::ConstantAsMetadata *V = 3059 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3060 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3061 }; 3062 3063 auto getMDString = [&](unsigned Idx) { 3064 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3065 return V->getString(); 3066 }; 3067 3068 switch (getMDInt(0)) { 3069 default: 3070 llvm_unreachable("Unexpected metadata!"); 3071 break; 3072 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3073 OFFLOAD_ENTRY_INFO_TARGET_REGION: 3074 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3075 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 3076 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 3077 /*Order=*/getMDInt(5)); 3078 break; 3079 } 3080 } 3081 } 3082 3083 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3084 if (!KmpRoutineEntryPtrTy) { 3085 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3086 auto &C = CGM.getContext(); 3087 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3088 FunctionProtoType::ExtProtoInfo EPI; 3089 KmpRoutineEntryPtrQTy = C.getPointerType( 3090 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3091 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3092 } 3093 } 3094 3095 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 3096 QualType FieldTy) { 3097 auto *Field = FieldDecl::Create( 3098 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 3099 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 3100 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 3101 Field->setAccess(AS_public); 3102 DC->addDecl(Field); 3103 return Field; 3104 } 3105 3106 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3107 3108 // Make sure the type of the entry is already created. This is the type we 3109 // have to create: 3110 // struct __tgt_offload_entry{ 3111 // void *addr; // Pointer to the offload entry info. 3112 // // (function or global) 3113 // char *name; // Name of the function or global. 3114 // size_t size; // Size of the entry info (0 if it a function). 3115 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3116 // int32_t reserved; // Reserved, to use by the runtime library. 3117 // }; 3118 if (TgtOffloadEntryQTy.isNull()) { 3119 ASTContext &C = CGM.getContext(); 3120 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3121 RD->startDefinition(); 3122 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3123 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3124 addFieldToRecordDecl(C, RD, C.getSizeType()); 3125 addFieldToRecordDecl( 3126 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3127 addFieldToRecordDecl( 3128 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3129 RD->completeDefinition(); 3130 TgtOffloadEntryQTy = C.getRecordType(RD); 3131 } 3132 return TgtOffloadEntryQTy; 3133 } 3134 3135 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 3136 // These are the types we need to build: 3137 // struct __tgt_device_image{ 3138 // void *ImageStart; // Pointer to the target code start. 3139 // void *ImageEnd; // Pointer to the target code end. 3140 // // We also add the host entries to the device image, as it may be useful 3141 // // for the target runtime to have access to that information. 3142 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 3143 // // the entries. 3144 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3145 // // entries (non inclusive). 3146 // }; 3147 if (TgtDeviceImageQTy.isNull()) { 3148 ASTContext &C = CGM.getContext(); 3149 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 3150 RD->startDefinition(); 3151 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3152 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3153 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3154 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3155 RD->completeDefinition(); 3156 TgtDeviceImageQTy = C.getRecordType(RD); 3157 } 3158 return TgtDeviceImageQTy; 3159 } 3160 3161 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 3162 // struct __tgt_bin_desc{ 3163 // int32_t NumDevices; // Number of devices supported. 3164 // __tgt_device_image *DeviceImages; // Arrays of device images 3165 // // (one per device). 3166 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 3167 // // entries. 3168 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3169 // // entries (non inclusive). 3170 // }; 3171 if (TgtBinaryDescriptorQTy.isNull()) { 3172 ASTContext &C = CGM.getContext(); 3173 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 3174 RD->startDefinition(); 3175 addFieldToRecordDecl( 3176 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3177 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 3178 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3179 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3180 RD->completeDefinition(); 3181 TgtBinaryDescriptorQTy = C.getRecordType(RD); 3182 } 3183 return TgtBinaryDescriptorQTy; 3184 } 3185 3186 namespace { 3187 struct PrivateHelpersTy { 3188 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 3189 const VarDecl *PrivateElemInit) 3190 : Original(Original), PrivateCopy(PrivateCopy), 3191 PrivateElemInit(PrivateElemInit) {} 3192 const VarDecl *Original; 3193 const VarDecl *PrivateCopy; 3194 const VarDecl *PrivateElemInit; 3195 }; 3196 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3197 } // anonymous namespace 3198 3199 static RecordDecl * 3200 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3201 if (!Privates.empty()) { 3202 auto &C = CGM.getContext(); 3203 // Build struct .kmp_privates_t. { 3204 // /* private vars */ 3205 // }; 3206 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 3207 RD->startDefinition(); 3208 for (auto &&Pair : Privates) { 3209 auto *VD = Pair.second.Original; 3210 auto Type = VD->getType(); 3211 Type = Type.getNonReferenceType(); 3212 auto *FD = addFieldToRecordDecl(C, RD, Type); 3213 if (VD->hasAttrs()) { 3214 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3215 E(VD->getAttrs().end()); 3216 I != E; ++I) 3217 FD->addAttr(*I); 3218 } 3219 } 3220 RD->completeDefinition(); 3221 return RD; 3222 } 3223 return nullptr; 3224 } 3225 3226 static RecordDecl * 3227 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3228 QualType KmpInt32Ty, 3229 QualType KmpRoutineEntryPointerQTy) { 3230 auto &C = CGM.getContext(); 3231 // Build struct kmp_task_t { 3232 // void * shareds; 3233 // kmp_routine_entry_t routine; 3234 // kmp_int32 part_id; 3235 // kmp_cmplrdata_t data1; 3236 // kmp_cmplrdata_t data2; 3237 // For taskloops additional fields: 3238 // kmp_uint64 lb; 3239 // kmp_uint64 ub; 3240 // kmp_int64 st; 3241 // kmp_int32 liter; 3242 // }; 3243 auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3244 UD->startDefinition(); 3245 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3246 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3247 UD->completeDefinition(); 3248 QualType KmpCmplrdataTy = C.getRecordType(UD); 3249 auto *RD = C.buildImplicitRecord("kmp_task_t"); 3250 RD->startDefinition(); 3251 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3252 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3253 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3254 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3255 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3256 if (isOpenMPTaskLoopDirective(Kind)) { 3257 QualType KmpUInt64Ty = 3258 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3259 QualType KmpInt64Ty = 3260 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3261 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3262 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3263 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3264 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3265 } 3266 RD->completeDefinition(); 3267 return RD; 3268 } 3269 3270 static RecordDecl * 3271 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3272 ArrayRef<PrivateDataTy> Privates) { 3273 auto &C = CGM.getContext(); 3274 // Build struct kmp_task_t_with_privates { 3275 // kmp_task_t task_data; 3276 // .kmp_privates_t. privates; 3277 // }; 3278 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3279 RD->startDefinition(); 3280 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3281 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 3282 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3283 } 3284 RD->completeDefinition(); 3285 return RD; 3286 } 3287 3288 /// \brief Emit a proxy function which accepts kmp_task_t as the second 3289 /// argument. 3290 /// \code 3291 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3292 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3293 /// For taskloops: 3294 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3295 /// tt->shareds); 3296 /// return 0; 3297 /// } 3298 /// \endcode 3299 static llvm::Value * 3300 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3301 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3302 QualType KmpTaskTWithPrivatesPtrQTy, 3303 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3304 QualType SharedsPtrTy, llvm::Value *TaskFunction, 3305 llvm::Value *TaskPrivatesMap) { 3306 auto &C = CGM.getContext(); 3307 FunctionArgList Args; 3308 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3309 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3310 /*Id=*/nullptr, 3311 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3312 Args.push_back(&GtidArg); 3313 Args.push_back(&TaskTypeArg); 3314 auto &TaskEntryFnInfo = 3315 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3316 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3317 auto *TaskEntry = 3318 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 3319 ".omp_task_entry.", &CGM.getModule()); 3320 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 3321 CodeGenFunction CGF(CGM); 3322 CGF.disableDebugInfo(); 3323 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 3324 3325 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3326 // tt, 3327 // For taskloops: 3328 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3329 // tt->task_data.shareds); 3330 auto *GtidParam = CGF.EmitLoadOfScalar( 3331 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3332 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3333 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3334 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3335 auto *KmpTaskTWithPrivatesQTyRD = 3336 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3337 LValue Base = 3338 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3339 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3340 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3341 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3342 auto *PartidParam = PartIdLVal.getPointer(); 3343 3344 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3345 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3346 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3347 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 3348 CGF.ConvertTypeForMem(SharedsPtrTy)); 3349 3350 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3351 llvm::Value *PrivatesParam; 3352 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3353 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3354 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3355 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3356 } else 3357 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3358 3359 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3360 TaskPrivatesMap, 3361 CGF.Builder 3362 .CreatePointerBitCastOrAddrSpaceCast( 3363 TDBase.getAddress(), CGF.VoidPtrTy) 3364 .getPointer()}; 3365 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3366 std::end(CommonArgs)); 3367 if (isOpenMPTaskLoopDirective(Kind)) { 3368 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3369 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3370 auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); 3371 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3372 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3373 auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); 3374 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3375 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 3376 auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); 3377 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3378 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 3379 auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); 3380 CallArgs.push_back(LBParam); 3381 CallArgs.push_back(UBParam); 3382 CallArgs.push_back(StParam); 3383 CallArgs.push_back(LIParam); 3384 } 3385 CallArgs.push_back(SharedsParam); 3386 3387 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 3388 CGF.EmitStoreThroughLValue( 3389 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3390 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3391 CGF.FinishFunction(); 3392 return TaskEntry; 3393 } 3394 3395 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3396 SourceLocation Loc, 3397 QualType KmpInt32Ty, 3398 QualType KmpTaskTWithPrivatesPtrQTy, 3399 QualType KmpTaskTWithPrivatesQTy) { 3400 auto &C = CGM.getContext(); 3401 FunctionArgList Args; 3402 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3403 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3404 /*Id=*/nullptr, 3405 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3406 Args.push_back(&GtidArg); 3407 Args.push_back(&TaskTypeArg); 3408 FunctionType::ExtInfo Info; 3409 auto &DestructorFnInfo = 3410 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3411 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 3412 auto *DestructorFn = 3413 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3414 ".omp_task_destructor.", &CGM.getModule()); 3415 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 3416 DestructorFnInfo); 3417 CodeGenFunction CGF(CGM); 3418 CGF.disableDebugInfo(); 3419 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3420 Args); 3421 3422 LValue Base = CGF.EmitLoadOfPointerLValue( 3423 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3424 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3425 auto *KmpTaskTWithPrivatesQTyRD = 3426 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3427 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3428 Base = CGF.EmitLValueForField(Base, *FI); 3429 for (auto *Field : 3430 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3431 if (auto DtorKind = Field->getType().isDestructedType()) { 3432 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 3433 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3434 } 3435 } 3436 CGF.FinishFunction(); 3437 return DestructorFn; 3438 } 3439 3440 /// \brief Emit a privates mapping function for correct handling of private and 3441 /// firstprivate variables. 3442 /// \code 3443 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3444 /// **noalias priv1,..., <tyn> **noalias privn) { 3445 /// *priv1 = &.privates.priv1; 3446 /// ...; 3447 /// *privn = &.privates.privn; 3448 /// } 3449 /// \endcode 3450 static llvm::Value * 3451 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3452 ArrayRef<const Expr *> PrivateVars, 3453 ArrayRef<const Expr *> FirstprivateVars, 3454 ArrayRef<const Expr *> LastprivateVars, 3455 QualType PrivatesQTy, 3456 ArrayRef<PrivateDataTy> Privates) { 3457 auto &C = CGM.getContext(); 3458 FunctionArgList Args; 3459 ImplicitParamDecl TaskPrivatesArg( 3460 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3461 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 3462 Args.push_back(&TaskPrivatesArg); 3463 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3464 unsigned Counter = 1; 3465 for (auto *E: PrivateVars) { 3466 Args.push_back(ImplicitParamDecl::Create( 3467 C, /*DC=*/nullptr, Loc, 3468 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3469 .withConst() 3470 .withRestrict())); 3471 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3472 PrivateVarsPos[VD] = Counter; 3473 ++Counter; 3474 } 3475 for (auto *E : FirstprivateVars) { 3476 Args.push_back(ImplicitParamDecl::Create( 3477 C, /*DC=*/nullptr, Loc, 3478 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3479 .withConst() 3480 .withRestrict())); 3481 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3482 PrivateVarsPos[VD] = Counter; 3483 ++Counter; 3484 } 3485 for (auto *E: LastprivateVars) { 3486 Args.push_back(ImplicitParamDecl::Create( 3487 C, /*DC=*/nullptr, Loc, 3488 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3489 .withConst() 3490 .withRestrict())); 3491 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3492 PrivateVarsPos[VD] = Counter; 3493 ++Counter; 3494 } 3495 auto &TaskPrivatesMapFnInfo = 3496 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3497 auto *TaskPrivatesMapTy = 3498 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3499 auto *TaskPrivatesMap = llvm::Function::Create( 3500 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 3501 ".omp_task_privates_map.", &CGM.getModule()); 3502 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 3503 TaskPrivatesMapFnInfo); 3504 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3505 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3506 CodeGenFunction CGF(CGM); 3507 CGF.disableDebugInfo(); 3508 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3509 TaskPrivatesMapFnInfo, Args); 3510 3511 // *privi = &.privates.privi; 3512 LValue Base = CGF.EmitLoadOfPointerLValue( 3513 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3514 TaskPrivatesArg.getType()->castAs<PointerType>()); 3515 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3516 Counter = 0; 3517 for (auto *Field : PrivatesQTyRD->fields()) { 3518 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 3519 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3520 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3521 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3522 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3523 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 3524 ++Counter; 3525 } 3526 CGF.FinishFunction(); 3527 return TaskPrivatesMap; 3528 } 3529 3530 static int array_pod_sort_comparator(const PrivateDataTy *P1, 3531 const PrivateDataTy *P2) { 3532 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 3533 } 3534 3535 /// Emit initialization for private variables in task-based directives. 3536 static void emitPrivatesInit(CodeGenFunction &CGF, 3537 const OMPExecutableDirective &D, 3538 Address KmpTaskSharedsPtr, LValue TDBase, 3539 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3540 QualType SharedsTy, QualType SharedsPtrTy, 3541 const OMPTaskDataTy &Data, 3542 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3543 auto &C = CGF.getContext(); 3544 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3545 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3546 LValue SrcBase; 3547 if (!Data.FirstprivateVars.empty()) { 3548 SrcBase = CGF.MakeAddrLValue( 3549 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3550 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3551 SharedsTy); 3552 } 3553 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 3554 cast<CapturedStmt>(*D.getAssociatedStmt())); 3555 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3556 for (auto &&Pair : Privates) { 3557 auto *VD = Pair.second.PrivateCopy; 3558 auto *Init = VD->getAnyInitializer(); 3559 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3560 !CGF.isTrivialInitializer(Init)))) { 3561 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3562 if (auto *Elem = Pair.second.PrivateElemInit) { 3563 auto *OriginalVD = Pair.second.Original; 3564 auto *SharedField = CapturesInfo.lookup(OriginalVD); 3565 auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3566 SharedRefLValue = CGF.MakeAddrLValue( 3567 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 3568 SharedRefLValue.getType(), AlignmentSource::Decl); 3569 QualType Type = OriginalVD->getType(); 3570 if (Type->isArrayType()) { 3571 // Initialize firstprivate array. 3572 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3573 // Perform simple memcpy. 3574 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 3575 SharedRefLValue.getAddress(), Type); 3576 } else { 3577 // Initialize firstprivate array using element-by-element 3578 // initialization. 3579 CGF.EmitOMPAggregateAssign( 3580 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 3581 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3582 Address SrcElement) { 3583 // Clean up any temporaries needed by the initialization. 3584 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3585 InitScope.addPrivate( 3586 Elem, [SrcElement]() -> Address { return SrcElement; }); 3587 (void)InitScope.Privatize(); 3588 // Emit initialization for single element. 3589 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3590 CGF, &CapturesInfo); 3591 CGF.EmitAnyExprToMem(Init, DestElement, 3592 Init->getType().getQualifiers(), 3593 /*IsInitializer=*/false); 3594 }); 3595 } 3596 } else { 3597 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3598 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 3599 return SharedRefLValue.getAddress(); 3600 }); 3601 (void)InitScope.Privatize(); 3602 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3603 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3604 /*capturedByInit=*/false); 3605 } 3606 } else 3607 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3608 } 3609 ++FI; 3610 } 3611 } 3612 3613 /// Check if duplication function is required for taskloops. 3614 static bool checkInitIsRequired(CodeGenFunction &CGF, 3615 ArrayRef<PrivateDataTy> Privates) { 3616 bool InitRequired = false; 3617 for (auto &&Pair : Privates) { 3618 auto *VD = Pair.second.PrivateCopy; 3619 auto *Init = VD->getAnyInitializer(); 3620 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3621 !CGF.isTrivialInitializer(Init)); 3622 } 3623 return InitRequired; 3624 } 3625 3626 3627 /// Emit task_dup function (for initialization of 3628 /// private/firstprivate/lastprivate vars and last_iter flag) 3629 /// \code 3630 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3631 /// lastpriv) { 3632 /// // setup lastprivate flag 3633 /// task_dst->last = lastpriv; 3634 /// // could be constructor calls here... 3635 /// } 3636 /// \endcode 3637 static llvm::Value * 3638 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3639 const OMPExecutableDirective &D, 3640 QualType KmpTaskTWithPrivatesPtrQTy, 3641 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3642 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3643 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3644 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3645 auto &C = CGM.getContext(); 3646 FunctionArgList Args; 3647 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, 3648 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3649 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, 3650 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 3651 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, 3652 /*Id=*/nullptr, C.IntTy); 3653 Args.push_back(&DstArg); 3654 Args.push_back(&SrcArg); 3655 Args.push_back(&LastprivArg); 3656 auto &TaskDupFnInfo = 3657 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3658 auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3659 auto *TaskDup = 3660 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 3661 ".omp_task_dup.", &CGM.getModule()); 3662 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); 3663 CodeGenFunction CGF(CGM); 3664 CGF.disableDebugInfo(); 3665 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); 3666 3667 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3668 CGF.GetAddrOfLocalVar(&DstArg), 3669 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3670 // task_dst->liter = lastpriv; 3671 if (WithLastIter) { 3672 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3673 LValue Base = CGF.EmitLValueForField( 3674 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3675 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3676 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3677 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3678 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3679 } 3680 3681 // Emit initial values for private copies (if any). 3682 assert(!Privates.empty()); 3683 Address KmpTaskSharedsPtr = Address::invalid(); 3684 if (!Data.FirstprivateVars.empty()) { 3685 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3686 CGF.GetAddrOfLocalVar(&SrcArg), 3687 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3688 LValue Base = CGF.EmitLValueForField( 3689 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3690 KmpTaskSharedsPtr = Address( 3691 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3692 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3693 KmpTaskTShareds)), 3694 Loc), 3695 CGF.getNaturalTypeAlignment(SharedsTy)); 3696 } 3697 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3698 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3699 CGF.FinishFunction(); 3700 return TaskDup; 3701 } 3702 3703 /// Checks if destructor function is required to be generated. 3704 /// \return true if cleanups are required, false otherwise. 3705 static bool 3706 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 3707 bool NeedsCleanup = false; 3708 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3709 auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 3710 for (auto *FD : PrivateRD->fields()) { 3711 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 3712 if (NeedsCleanup) 3713 break; 3714 } 3715 return NeedsCleanup; 3716 } 3717 3718 CGOpenMPRuntime::TaskResultTy 3719 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3720 const OMPExecutableDirective &D, 3721 llvm::Value *TaskFunction, QualType SharedsTy, 3722 Address Shareds, const OMPTaskDataTy &Data) { 3723 auto &C = CGM.getContext(); 3724 llvm::SmallVector<PrivateDataTy, 4> Privates; 3725 // Aggregate privates and sort them by the alignment. 3726 auto I = Data.PrivateCopies.begin(); 3727 for (auto *E : Data.PrivateVars) { 3728 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3729 Privates.push_back(std::make_pair( 3730 C.getDeclAlign(VD), 3731 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3732 /*PrivateElemInit=*/nullptr))); 3733 ++I; 3734 } 3735 I = Data.FirstprivateCopies.begin(); 3736 auto IElemInitRef = Data.FirstprivateInits.begin(); 3737 for (auto *E : Data.FirstprivateVars) { 3738 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3739 Privates.push_back(std::make_pair( 3740 C.getDeclAlign(VD), 3741 PrivateHelpersTy( 3742 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3743 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 3744 ++I; 3745 ++IElemInitRef; 3746 } 3747 I = Data.LastprivateCopies.begin(); 3748 for (auto *E : Data.LastprivateVars) { 3749 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3750 Privates.push_back(std::make_pair( 3751 C.getDeclAlign(VD), 3752 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3753 /*PrivateElemInit=*/nullptr))); 3754 ++I; 3755 } 3756 llvm::array_pod_sort(Privates.begin(), Privates.end(), 3757 array_pod_sort_comparator); 3758 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3759 // Build type kmp_routine_entry_t (if not built yet). 3760 emitKmpRoutineEntryT(KmpInt32Ty); 3761 // Build type kmp_task_t (if not built yet). 3762 if (KmpTaskTQTy.isNull()) { 3763 KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3764 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3765 } 3766 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3767 // Build particular struct kmp_task_t for the given task. 3768 auto *KmpTaskTWithPrivatesQTyRD = 3769 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3770 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3771 QualType KmpTaskTWithPrivatesPtrQTy = 3772 C.getPointerType(KmpTaskTWithPrivatesQTy); 3773 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3774 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 3775 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3776 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3777 3778 // Emit initial values for private copies (if any). 3779 llvm::Value *TaskPrivatesMap = nullptr; 3780 auto *TaskPrivatesMapTy = 3781 std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); 3782 if (!Privates.empty()) { 3783 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3784 TaskPrivatesMap = emitTaskPrivateMappingFunction( 3785 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 3786 FI->getType(), Privates); 3787 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3788 TaskPrivatesMap, TaskPrivatesMapTy); 3789 } else { 3790 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3791 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3792 } 3793 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3794 // kmp_task_t *tt); 3795 auto *TaskEntry = emitProxyTaskFunction( 3796 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3797 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3798 TaskPrivatesMap); 3799 3800 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3801 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3802 // kmp_routine_entry_t *task_entry); 3803 // Task flags. Format is taken from 3804 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 3805 // description of kmp_tasking_flags struct. 3806 enum { 3807 TiedFlag = 0x1, 3808 FinalFlag = 0x2, 3809 DestructorsFlag = 0x8, 3810 PriorityFlag = 0x20 3811 }; 3812 unsigned Flags = Data.Tied ? TiedFlag : 0; 3813 bool NeedsCleanup = false; 3814 if (!Privates.empty()) { 3815 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 3816 if (NeedsCleanup) 3817 Flags = Flags | DestructorsFlag; 3818 } 3819 if (Data.Priority.getInt()) 3820 Flags = Flags | PriorityFlag; 3821 auto *TaskFlags = 3822 Data.Final.getPointer() 3823 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3824 CGF.Builder.getInt32(FinalFlag), 3825 CGF.Builder.getInt32(/*C=*/0)) 3826 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3827 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3828 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3829 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 3830 getThreadID(CGF, Loc), TaskFlags, 3831 KmpTaskTWithPrivatesTySize, SharedsSize, 3832 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3833 TaskEntry, KmpRoutineEntryPtrTy)}; 3834 auto *NewTask = CGF.EmitRuntimeCall( 3835 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 3836 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3837 NewTask, KmpTaskTWithPrivatesPtrTy); 3838 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 3839 KmpTaskTWithPrivatesQTy); 3840 LValue TDBase = 3841 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3842 // Fill the data in the resulting kmp_task_t record. 3843 // Copy shareds if there are any. 3844 Address KmpTaskSharedsPtr = Address::invalid(); 3845 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3846 KmpTaskSharedsPtr = 3847 Address(CGF.EmitLoadOfScalar( 3848 CGF.EmitLValueForField( 3849 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 3850 KmpTaskTShareds)), 3851 Loc), 3852 CGF.getNaturalTypeAlignment(SharedsTy)); 3853 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 3854 } 3855 // Emit initial values for private copies (if any). 3856 TaskResultTy Result; 3857 if (!Privates.empty()) { 3858 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3859 SharedsTy, SharedsPtrTy, Data, Privates, 3860 /*ForDup=*/false); 3861 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3862 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3863 Result.TaskDupFn = emitTaskDupFunction( 3864 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3865 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3866 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3867 } 3868 } 3869 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 3870 enum { Priority = 0, Destructors = 1 }; 3871 // Provide pointer to function with destructors for privates. 3872 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 3873 auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); 3874 if (NeedsCleanup) { 3875 llvm::Value *DestructorFn = emitDestructorsFunction( 3876 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3877 KmpTaskTWithPrivatesQTy); 3878 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 3879 LValue DestructorsLV = CGF.EmitLValueForField( 3880 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 3881 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3882 DestructorFn, KmpRoutineEntryPtrTy), 3883 DestructorsLV); 3884 } 3885 // Set priority. 3886 if (Data.Priority.getInt()) { 3887 LValue Data2LV = CGF.EmitLValueForField( 3888 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 3889 LValue PriorityLV = CGF.EmitLValueForField( 3890 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 3891 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 3892 } 3893 Result.NewTask = NewTask; 3894 Result.TaskEntry = TaskEntry; 3895 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3896 Result.TDBase = TDBase; 3897 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3898 return Result; 3899 } 3900 3901 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 3902 const OMPExecutableDirective &D, 3903 llvm::Value *TaskFunction, 3904 QualType SharedsTy, Address Shareds, 3905 const Expr *IfCond, 3906 const OMPTaskDataTy &Data) { 3907 if (!CGF.HaveInsertPoint()) 3908 return; 3909 3910 TaskResultTy Result = 3911 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 3912 llvm::Value *NewTask = Result.NewTask; 3913 llvm::Value *TaskEntry = Result.TaskEntry; 3914 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 3915 LValue TDBase = Result.TDBase; 3916 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 3917 auto &C = CGM.getContext(); 3918 // Process list of dependences. 3919 Address DependenciesArray = Address::invalid(); 3920 unsigned NumDependencies = Data.Dependences.size(); 3921 if (NumDependencies) { 3922 // Dependence kind for RTL. 3923 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 3924 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 3925 RecordDecl *KmpDependInfoRD; 3926 QualType FlagsTy = 3927 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 3928 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 3929 if (KmpDependInfoTy.isNull()) { 3930 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 3931 KmpDependInfoRD->startDefinition(); 3932 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 3933 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 3934 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 3935 KmpDependInfoRD->completeDefinition(); 3936 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 3937 } else 3938 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 3939 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 3940 // Define type kmp_depend_info[<Dependences.size()>]; 3941 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 3942 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 3943 ArrayType::Normal, /*IndexTypeQuals=*/0); 3944 // kmp_depend_info[<Dependences.size()>] deps; 3945 DependenciesArray = 3946 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 3947 for (unsigned i = 0; i < NumDependencies; ++i) { 3948 const Expr *E = Data.Dependences[i].second; 3949 auto Addr = CGF.EmitLValue(E); 3950 llvm::Value *Size; 3951 QualType Ty = E->getType(); 3952 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3953 LValue UpAddrLVal = 3954 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 3955 llvm::Value *UpAddr = 3956 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 3957 llvm::Value *LowIntPtr = 3958 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 3959 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 3960 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3961 } else 3962 Size = CGF.getTypeSize(Ty); 3963 auto Base = CGF.MakeAddrLValue( 3964 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 3965 KmpDependInfoTy); 3966 // deps[i].base_addr = &<Dependences[i].second>; 3967 auto BaseAddrLVal = CGF.EmitLValueForField( 3968 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 3969 CGF.EmitStoreOfScalar( 3970 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 3971 BaseAddrLVal); 3972 // deps[i].len = sizeof(<Dependences[i].second>); 3973 auto LenLVal = CGF.EmitLValueForField( 3974 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 3975 CGF.EmitStoreOfScalar(Size, LenLVal); 3976 // deps[i].flags = <Dependences[i].first>; 3977 RTLDependenceKindTy DepKind; 3978 switch (Data.Dependences[i].first) { 3979 case OMPC_DEPEND_in: 3980 DepKind = DepIn; 3981 break; 3982 // Out and InOut dependencies must use the same code. 3983 case OMPC_DEPEND_out: 3984 case OMPC_DEPEND_inout: 3985 DepKind = DepInOut; 3986 break; 3987 case OMPC_DEPEND_source: 3988 case OMPC_DEPEND_sink: 3989 case OMPC_DEPEND_unknown: 3990 llvm_unreachable("Unknown task dependence type"); 3991 } 3992 auto FlagsLVal = CGF.EmitLValueForField( 3993 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 3994 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 3995 FlagsLVal); 3996 } 3997 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3998 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 3999 CGF.VoidPtrTy); 4000 } 4001 4002 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4003 // libcall. 4004 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4005 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4006 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4007 // list is not empty 4008 auto *ThreadID = getThreadID(CGF, Loc); 4009 auto *UpLoc = emitUpdateLocation(CGF, Loc); 4010 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4011 llvm::Value *DepTaskArgs[7]; 4012 if (NumDependencies) { 4013 DepTaskArgs[0] = UpLoc; 4014 DepTaskArgs[1] = ThreadID; 4015 DepTaskArgs[2] = NewTask; 4016 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 4017 DepTaskArgs[4] = DependenciesArray.getPointer(); 4018 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4019 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4020 } 4021 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 4022 &TaskArgs, 4023 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4024 if (!Data.Tied) { 4025 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4026 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4027 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4028 } 4029 if (NumDependencies) { 4030 CGF.EmitRuntimeCall( 4031 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 4032 } else { 4033 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 4034 TaskArgs); 4035 } 4036 // Check if parent region is untied and build return for untied task; 4037 if (auto *Region = 4038 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4039 Region->emitUntiedSwitch(CGF); 4040 }; 4041 4042 llvm::Value *DepWaitTaskArgs[6]; 4043 if (NumDependencies) { 4044 DepWaitTaskArgs[0] = UpLoc; 4045 DepWaitTaskArgs[1] = ThreadID; 4046 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 4047 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4048 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4049 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4050 } 4051 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 4052 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, 4053 PrePostActionTy &) { 4054 auto &RT = CGF.CGM.getOpenMPRuntime(); 4055 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4056 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4057 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4058 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4059 // is specified. 4060 if (NumDependencies) 4061 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 4062 DepWaitTaskArgs); 4063 // Call proxy_task_entry(gtid, new_task); 4064 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( 4065 CodeGenFunction &CGF, PrePostActionTy &Action) { 4066 Action.Enter(CGF); 4067 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4068 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 4069 }; 4070 4071 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4072 // kmp_task_t *new_task); 4073 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4074 // kmp_task_t *new_task); 4075 RegionCodeGenTy RCG(CodeGen); 4076 CommonActionTy Action( 4077 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 4078 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 4079 RCG.setAction(Action); 4080 RCG(CGF); 4081 }; 4082 4083 if (IfCond) 4084 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4085 else { 4086 RegionCodeGenTy ThenRCG(ThenCodeGen); 4087 ThenRCG(CGF); 4088 } 4089 } 4090 4091 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4092 const OMPLoopDirective &D, 4093 llvm::Value *TaskFunction, 4094 QualType SharedsTy, Address Shareds, 4095 const Expr *IfCond, 4096 const OMPTaskDataTy &Data) { 4097 if (!CGF.HaveInsertPoint()) 4098 return; 4099 TaskResultTy Result = 4100 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4101 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4102 // libcall. 4103 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4104 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4105 // sched, kmp_uint64 grainsize, void *task_dup); 4106 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4107 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4108 llvm::Value *IfVal; 4109 if (IfCond) { 4110 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4111 /*isSigned=*/true); 4112 } else 4113 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4114 4115 LValue LBLVal = CGF.EmitLValueForField( 4116 Result.TDBase, 4117 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4118 auto *LBVar = 4119 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4120 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4121 /*IsInitializer=*/true); 4122 LValue UBLVal = CGF.EmitLValueForField( 4123 Result.TDBase, 4124 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4125 auto *UBVar = 4126 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4127 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4128 /*IsInitializer=*/true); 4129 LValue StLVal = CGF.EmitLValueForField( 4130 Result.TDBase, 4131 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4132 auto *StVar = 4133 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4134 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4135 /*IsInitializer=*/true); 4136 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4137 llvm::Value *TaskArgs[] = { 4138 UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), 4139 UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), 4140 llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), 4141 llvm::ConstantInt::getSigned( 4142 CGF.IntTy, Data.Schedule.getPointer() 4143 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4144 : NoSchedule), 4145 Data.Schedule.getPointer() 4146 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4147 /*isSigned=*/false) 4148 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4149 Result.TaskDupFn 4150 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, 4151 CGF.VoidPtrTy) 4152 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4153 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 4154 } 4155 4156 /// \brief Emit reduction operation for each element of array (required for 4157 /// array sections) LHS op = RHS. 4158 /// \param Type Type of array. 4159 /// \param LHSVar Variable on the left side of the reduction operation 4160 /// (references element of array in original variable). 4161 /// \param RHSVar Variable on the right side of the reduction operation 4162 /// (references element of array in original variable). 4163 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4164 /// RHSVar. 4165 static void EmitOMPAggregateReduction( 4166 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4167 const VarDecl *RHSVar, 4168 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4169 const Expr *, const Expr *)> &RedOpGen, 4170 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4171 const Expr *UpExpr = nullptr) { 4172 // Perform element-by-element initialization. 4173 QualType ElementTy; 4174 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4175 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4176 4177 // Drill down to the base element type on both arrays. 4178 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 4179 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4180 4181 auto RHSBegin = RHSAddr.getPointer(); 4182 auto LHSBegin = LHSAddr.getPointer(); 4183 // Cast from pointer to array type to pointer to single element. 4184 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 4185 // The basic structure here is a while-do loop. 4186 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4187 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4188 auto IsEmpty = 4189 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4190 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4191 4192 // Enter the loop body, making that address the current address. 4193 auto EntryBB = CGF.Builder.GetInsertBlock(); 4194 CGF.EmitBlock(BodyBB); 4195 4196 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4197 4198 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4199 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4200 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4201 Address RHSElementCurrent = 4202 Address(RHSElementPHI, 4203 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4204 4205 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4206 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4207 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4208 Address LHSElementCurrent = 4209 Address(LHSElementPHI, 4210 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4211 4212 // Emit copy. 4213 CodeGenFunction::OMPPrivateScope Scope(CGF); 4214 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 4215 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 4216 Scope.Privatize(); 4217 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4218 Scope.ForceCleanup(); 4219 4220 // Shift the address forward by one element. 4221 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4222 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 4223 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4224 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 4225 // Check whether we've reached the end. 4226 auto Done = 4227 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4228 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4229 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4230 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4231 4232 // Done. 4233 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4234 } 4235 4236 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4237 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4238 /// UDR combiner function. 4239 static void emitReductionCombiner(CodeGenFunction &CGF, 4240 const Expr *ReductionOp) { 4241 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4242 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4243 if (auto *DRE = 4244 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4245 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4246 std::pair<llvm::Function *, llvm::Function *> Reduction = 4247 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4248 RValue Func = RValue::get(Reduction.first); 4249 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4250 CGF.EmitIgnoredExpr(ReductionOp); 4251 return; 4252 } 4253 CGF.EmitIgnoredExpr(ReductionOp); 4254 } 4255 4256 llvm::Value *CGOpenMPRuntime::emitReductionFunction( 4257 CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 4258 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 4259 ArrayRef<const Expr *> ReductionOps) { 4260 auto &C = CGM.getContext(); 4261 4262 // void reduction_func(void *LHSArg, void *RHSArg); 4263 FunctionArgList Args; 4264 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4265 C.VoidPtrTy); 4266 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 4267 C.VoidPtrTy); 4268 Args.push_back(&LHSArg); 4269 Args.push_back(&RHSArg); 4270 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4271 auto *Fn = llvm::Function::Create( 4272 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 4273 ".omp.reduction.reduction_func", &CGM.getModule()); 4274 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 4275 CodeGenFunction CGF(CGM); 4276 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 4277 4278 // Dst = (void*[n])(LHSArg); 4279 // Src = (void*[n])(RHSArg); 4280 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4281 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4282 ArgsType), CGF.getPointerAlign()); 4283 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4284 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4285 ArgsType), CGF.getPointerAlign()); 4286 4287 // ... 4288 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4289 // ... 4290 CodeGenFunction::OMPPrivateScope Scope(CGF); 4291 auto IPriv = Privates.begin(); 4292 unsigned Idx = 0; 4293 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4294 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4295 Scope.addPrivate(RHSVar, [&]() -> Address { 4296 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 4297 }); 4298 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4299 Scope.addPrivate(LHSVar, [&]() -> Address { 4300 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 4301 }); 4302 QualType PrivTy = (*IPriv)->getType(); 4303 if (PrivTy->isVariablyModifiedType()) { 4304 // Get array size and emit VLA type. 4305 ++Idx; 4306 Address Elem = 4307 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 4308 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4309 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 4310 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4311 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4312 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4313 CGF.EmitVariablyModifiedType(PrivTy); 4314 } 4315 } 4316 Scope.Privatize(); 4317 IPriv = Privates.begin(); 4318 auto ILHS = LHSExprs.begin(); 4319 auto IRHS = RHSExprs.begin(); 4320 for (auto *E : ReductionOps) { 4321 if ((*IPriv)->getType()->isArrayType()) { 4322 // Emit reduction for array section. 4323 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4324 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4325 EmitOMPAggregateReduction( 4326 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4327 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4328 emitReductionCombiner(CGF, E); 4329 }); 4330 } else 4331 // Emit reduction for array subscript or single variable. 4332 emitReductionCombiner(CGF, E); 4333 ++IPriv; 4334 ++ILHS; 4335 ++IRHS; 4336 } 4337 Scope.ForceCleanup(); 4338 CGF.FinishFunction(); 4339 return Fn; 4340 } 4341 4342 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 4343 const Expr *ReductionOp, 4344 const Expr *PrivateRef, 4345 const DeclRefExpr *LHS, 4346 const DeclRefExpr *RHS) { 4347 if (PrivateRef->getType()->isArrayType()) { 4348 // Emit reduction for array section. 4349 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4350 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4351 EmitOMPAggregateReduction( 4352 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4353 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4354 emitReductionCombiner(CGF, ReductionOp); 4355 }); 4356 } else 4357 // Emit reduction for array subscript or single variable. 4358 emitReductionCombiner(CGF, ReductionOp); 4359 } 4360 4361 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4362 ArrayRef<const Expr *> Privates, 4363 ArrayRef<const Expr *> LHSExprs, 4364 ArrayRef<const Expr *> RHSExprs, 4365 ArrayRef<const Expr *> ReductionOps, 4366 ReductionOptionsTy Options) { 4367 if (!CGF.HaveInsertPoint()) 4368 return; 4369 4370 bool WithNowait = Options.WithNowait; 4371 bool SimpleReduction = Options.SimpleReduction; 4372 4373 // Next code should be emitted for reduction: 4374 // 4375 // static kmp_critical_name lock = { 0 }; 4376 // 4377 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4378 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4379 // ... 4380 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4381 // *(Type<n>-1*)rhs[<n>-1]); 4382 // } 4383 // 4384 // ... 4385 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4386 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4387 // RedList, reduce_func, &<lock>)) { 4388 // case 1: 4389 // ... 4390 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4391 // ... 4392 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4393 // break; 4394 // case 2: 4395 // ... 4396 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4397 // ... 4398 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4399 // break; 4400 // default:; 4401 // } 4402 // 4403 // if SimpleReduction is true, only the next code is generated: 4404 // ... 4405 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4406 // ... 4407 4408 auto &C = CGM.getContext(); 4409 4410 if (SimpleReduction) { 4411 CodeGenFunction::RunCleanupsScope Scope(CGF); 4412 auto IPriv = Privates.begin(); 4413 auto ILHS = LHSExprs.begin(); 4414 auto IRHS = RHSExprs.begin(); 4415 for (auto *E : ReductionOps) { 4416 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4417 cast<DeclRefExpr>(*IRHS)); 4418 ++IPriv; 4419 ++ILHS; 4420 ++IRHS; 4421 } 4422 return; 4423 } 4424 4425 // 1. Build a list of reduction variables. 4426 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4427 auto Size = RHSExprs.size(); 4428 for (auto *E : Privates) { 4429 if (E->getType()->isVariablyModifiedType()) 4430 // Reserve place for array size. 4431 ++Size; 4432 } 4433 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4434 QualType ReductionArrayTy = 4435 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 4436 /*IndexTypeQuals=*/0); 4437 Address ReductionList = 4438 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4439 auto IPriv = Privates.begin(); 4440 unsigned Idx = 0; 4441 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4442 Address Elem = 4443 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 4444 CGF.Builder.CreateStore( 4445 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4446 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 4447 Elem); 4448 if ((*IPriv)->getType()->isVariablyModifiedType()) { 4449 // Store array size. 4450 ++Idx; 4451 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 4452 CGF.getPointerSize()); 4453 llvm::Value *Size = CGF.Builder.CreateIntCast( 4454 CGF.getVLASize( 4455 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 4456 .first, 4457 CGF.SizeTy, /*isSigned=*/false); 4458 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 4459 Elem); 4460 } 4461 } 4462 4463 // 2. Emit reduce_func(). 4464 auto *ReductionFn = emitReductionFunction( 4465 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 4466 LHSExprs, RHSExprs, ReductionOps); 4467 4468 // 3. Create static kmp_critical_name lock = { 0 }; 4469 auto *Lock = getCriticalRegionLock(".reduction"); 4470 4471 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4472 // RedList, reduce_func, &<lock>); 4473 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 4474 auto *ThreadId = getThreadID(CGF, Loc); 4475 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 4476 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4477 ReductionList.getPointer(), CGF.VoidPtrTy); 4478 llvm::Value *Args[] = { 4479 IdentTLoc, // ident_t *<loc> 4480 ThreadId, // i32 <gtid> 4481 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 4482 ReductionArrayTySize, // size_type sizeof(RedList) 4483 RL, // void *RedList 4484 ReductionFn, // void (*) (void *, void *) <reduce_func> 4485 Lock // kmp_critical_name *&<lock> 4486 }; 4487 auto Res = CGF.EmitRuntimeCall( 4488 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 4489 : OMPRTL__kmpc_reduce), 4490 Args); 4491 4492 // 5. Build switch(res) 4493 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 4494 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 4495 4496 // 6. Build case 1: 4497 // ... 4498 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4499 // ... 4500 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4501 // break; 4502 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 4503 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 4504 CGF.EmitBlock(Case1BB); 4505 4506 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4507 llvm::Value *EndArgs[] = { 4508 IdentTLoc, // ident_t *<loc> 4509 ThreadId, // i32 <gtid> 4510 Lock // kmp_critical_name *&<lock> 4511 }; 4512 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4513 CodeGenFunction &CGF, PrePostActionTy &Action) { 4514 auto &RT = CGF.CGM.getOpenMPRuntime(); 4515 auto IPriv = Privates.begin(); 4516 auto ILHS = LHSExprs.begin(); 4517 auto IRHS = RHSExprs.begin(); 4518 for (auto *E : ReductionOps) { 4519 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4520 cast<DeclRefExpr>(*IRHS)); 4521 ++IPriv; 4522 ++ILHS; 4523 ++IRHS; 4524 } 4525 }; 4526 RegionCodeGenTy RCG(CodeGen); 4527 CommonActionTy Action( 4528 nullptr, llvm::None, 4529 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 4530 : OMPRTL__kmpc_end_reduce), 4531 EndArgs); 4532 RCG.setAction(Action); 4533 RCG(CGF); 4534 4535 CGF.EmitBranch(DefaultBB); 4536 4537 // 7. Build case 2: 4538 // ... 4539 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4540 // ... 4541 // break; 4542 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 4543 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 4544 CGF.EmitBlock(Case2BB); 4545 4546 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4547 CodeGenFunction &CGF, PrePostActionTy &Action) { 4548 auto ILHS = LHSExprs.begin(); 4549 auto IRHS = RHSExprs.begin(); 4550 auto IPriv = Privates.begin(); 4551 for (auto *E : ReductionOps) { 4552 const Expr *XExpr = nullptr; 4553 const Expr *EExpr = nullptr; 4554 const Expr *UpExpr = nullptr; 4555 BinaryOperatorKind BO = BO_Comma; 4556 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 4557 if (BO->getOpcode() == BO_Assign) { 4558 XExpr = BO->getLHS(); 4559 UpExpr = BO->getRHS(); 4560 } 4561 } 4562 // Try to emit update expression as a simple atomic. 4563 auto *RHSExpr = UpExpr; 4564 if (RHSExpr) { 4565 // Analyze RHS part of the whole expression. 4566 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 4567 RHSExpr->IgnoreParenImpCasts())) { 4568 // If this is a conditional operator, analyze its condition for 4569 // min/max reduction operator. 4570 RHSExpr = ACO->getCond(); 4571 } 4572 if (auto *BORHS = 4573 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 4574 EExpr = BORHS->getRHS(); 4575 BO = BORHS->getOpcode(); 4576 } 4577 } 4578 if (XExpr) { 4579 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4580 auto &&AtomicRedGen = [BO, VD, 4581 Loc](CodeGenFunction &CGF, const Expr *XExpr, 4582 const Expr *EExpr, const Expr *UpExpr) { 4583 LValue X = CGF.EmitLValue(XExpr); 4584 RValue E; 4585 if (EExpr) 4586 E = CGF.EmitAnyExpr(EExpr); 4587 CGF.EmitOMPAtomicSimpleUpdateExpr( 4588 X, E, BO, /*IsXLHSInRHSPart=*/true, 4589 llvm::AtomicOrdering::Monotonic, Loc, 4590 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 4591 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4592 PrivateScope.addPrivate( 4593 VD, [&CGF, VD, XRValue, Loc]() -> Address { 4594 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 4595 CGF.emitOMPSimpleStore( 4596 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 4597 VD->getType().getNonReferenceType(), Loc); 4598 return LHSTemp; 4599 }); 4600 (void)PrivateScope.Privatize(); 4601 return CGF.EmitAnyExpr(UpExpr); 4602 }); 4603 }; 4604 if ((*IPriv)->getType()->isArrayType()) { 4605 // Emit atomic reduction for array section. 4606 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4607 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 4608 AtomicRedGen, XExpr, EExpr, UpExpr); 4609 } else 4610 // Emit atomic reduction for array subscript or single variable. 4611 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 4612 } else { 4613 // Emit as a critical region. 4614 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 4615 const Expr *, const Expr *) { 4616 auto &RT = CGF.CGM.getOpenMPRuntime(); 4617 RT.emitCriticalRegion( 4618 CGF, ".atomic_reduction", 4619 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 4620 Action.Enter(CGF); 4621 emitReductionCombiner(CGF, E); 4622 }, 4623 Loc); 4624 }; 4625 if ((*IPriv)->getType()->isArrayType()) { 4626 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4627 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4628 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4629 CritRedGen); 4630 } else 4631 CritRedGen(CGF, nullptr, nullptr, nullptr); 4632 } 4633 ++ILHS; 4634 ++IRHS; 4635 ++IPriv; 4636 } 4637 }; 4638 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 4639 if (!WithNowait) { 4640 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 4641 llvm::Value *EndArgs[] = { 4642 IdentTLoc, // ident_t *<loc> 4643 ThreadId, // i32 <gtid> 4644 Lock // kmp_critical_name *&<lock> 4645 }; 4646 CommonActionTy Action(nullptr, llvm::None, 4647 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 4648 EndArgs); 4649 AtomicRCG.setAction(Action); 4650 AtomicRCG(CGF); 4651 } else 4652 AtomicRCG(CGF); 4653 4654 CGF.EmitBranch(DefaultBB); 4655 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 4656 } 4657 4658 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 4659 SourceLocation Loc) { 4660 if (!CGF.HaveInsertPoint()) 4661 return; 4662 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 4663 // global_tid); 4664 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 4665 // Ignore return result until untied tasks are supported. 4666 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 4667 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4668 Region->emitUntiedSwitch(CGF); 4669 } 4670 4671 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 4672 OpenMPDirectiveKind InnerKind, 4673 const RegionCodeGenTy &CodeGen, 4674 bool HasCancel) { 4675 if (!CGF.HaveInsertPoint()) 4676 return; 4677 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 4678 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 4679 } 4680 4681 namespace { 4682 enum RTCancelKind { 4683 CancelNoreq = 0, 4684 CancelParallel = 1, 4685 CancelLoop = 2, 4686 CancelSections = 3, 4687 CancelTaskgroup = 4 4688 }; 4689 } // anonymous namespace 4690 4691 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 4692 RTCancelKind CancelKind = CancelNoreq; 4693 if (CancelRegion == OMPD_parallel) 4694 CancelKind = CancelParallel; 4695 else if (CancelRegion == OMPD_for) 4696 CancelKind = CancelLoop; 4697 else if (CancelRegion == OMPD_sections) 4698 CancelKind = CancelSections; 4699 else { 4700 assert(CancelRegion == OMPD_taskgroup); 4701 CancelKind = CancelTaskgroup; 4702 } 4703 return CancelKind; 4704 } 4705 4706 void CGOpenMPRuntime::emitCancellationPointCall( 4707 CodeGenFunction &CGF, SourceLocation Loc, 4708 OpenMPDirectiveKind CancelRegion) { 4709 if (!CGF.HaveInsertPoint()) 4710 return; 4711 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 4712 // global_tid, kmp_int32 cncl_kind); 4713 if (auto *OMPRegionInfo = 4714 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4715 // For 'cancellation point taskgroup', the task region info may not have a 4716 // cancel. This may instead happen in another adjacent task. 4717 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 4718 llvm::Value *Args[] = { 4719 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 4720 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4721 // Ignore return result until untied tasks are supported. 4722 auto *Result = CGF.EmitRuntimeCall( 4723 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 4724 // if (__kmpc_cancellationpoint()) { 4725 // exit from construct; 4726 // } 4727 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4728 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4729 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4730 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4731 CGF.EmitBlock(ExitBB); 4732 // exit from construct; 4733 auto CancelDest = 4734 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4735 CGF.EmitBranchThroughCleanup(CancelDest); 4736 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4737 } 4738 } 4739 } 4740 4741 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 4742 const Expr *IfCond, 4743 OpenMPDirectiveKind CancelRegion) { 4744 if (!CGF.HaveInsertPoint()) 4745 return; 4746 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 4747 // kmp_int32 cncl_kind); 4748 if (auto *OMPRegionInfo = 4749 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4750 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 4751 PrePostActionTy &) { 4752 auto &RT = CGF.CGM.getOpenMPRuntime(); 4753 llvm::Value *Args[] = { 4754 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 4755 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4756 // Ignore return result until untied tasks are supported. 4757 auto *Result = CGF.EmitRuntimeCall( 4758 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 4759 // if (__kmpc_cancel()) { 4760 // exit from construct; 4761 // } 4762 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4763 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4764 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4765 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4766 CGF.EmitBlock(ExitBB); 4767 // exit from construct; 4768 auto CancelDest = 4769 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4770 CGF.EmitBranchThroughCleanup(CancelDest); 4771 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4772 }; 4773 if (IfCond) 4774 emitOMPIfClause(CGF, IfCond, ThenGen, 4775 [](CodeGenFunction &, PrePostActionTy &) {}); 4776 else { 4777 RegionCodeGenTy ThenRCG(ThenGen); 4778 ThenRCG(CGF); 4779 } 4780 } 4781 } 4782 4783 /// \brief Obtain information that uniquely identifies a target entry. This 4784 /// consists of the file and device IDs as well as line number associated with 4785 /// the relevant entry source location. 4786 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 4787 unsigned &DeviceID, unsigned &FileID, 4788 unsigned &LineNum) { 4789 4790 auto &SM = C.getSourceManager(); 4791 4792 // The loc should be always valid and have a file ID (the user cannot use 4793 // #pragma directives in macros) 4794 4795 assert(Loc.isValid() && "Source location is expected to be always valid."); 4796 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 4797 4798 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 4799 assert(PLoc.isValid() && "Source location is expected to be always valid."); 4800 4801 llvm::sys::fs::UniqueID ID; 4802 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 4803 llvm_unreachable("Source file with target region no longer exists!"); 4804 4805 DeviceID = ID.getDevice(); 4806 FileID = ID.getFile(); 4807 LineNum = PLoc.getLine(); 4808 } 4809 4810 void CGOpenMPRuntime::emitTargetOutlinedFunction( 4811 const OMPExecutableDirective &D, StringRef ParentName, 4812 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4813 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4814 assert(!ParentName.empty() && "Invalid target region parent name!"); 4815 4816 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 4817 IsOffloadEntry, CodeGen); 4818 } 4819 4820 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 4821 const OMPExecutableDirective &D, StringRef ParentName, 4822 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4823 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4824 // Create a unique name for the entry function using the source location 4825 // information of the current target region. The name will be something like: 4826 // 4827 // __omp_offloading_DD_FFFF_PP_lBB 4828 // 4829 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 4830 // mangled name of the function that encloses the target region and BB is the 4831 // line number of the target region. 4832 4833 unsigned DeviceID; 4834 unsigned FileID; 4835 unsigned Line; 4836 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 4837 Line); 4838 SmallString<64> EntryFnName; 4839 { 4840 llvm::raw_svector_ostream OS(EntryFnName); 4841 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 4842 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 4843 } 4844 4845 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4846 4847 CodeGenFunction CGF(CGM, true); 4848 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 4849 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4850 4851 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 4852 4853 // If this target outline function is not an offload entry, we don't need to 4854 // register it. 4855 if (!IsOffloadEntry) 4856 return; 4857 4858 // The target region ID is used by the runtime library to identify the current 4859 // target region, so it only has to be unique and not necessarily point to 4860 // anything. It could be the pointer to the outlined function that implements 4861 // the target region, but we aren't using that so that the compiler doesn't 4862 // need to keep that, and could therefore inline the host function if proven 4863 // worthwhile during optimization. In the other hand, if emitting code for the 4864 // device, the ID has to be the function address so that it can retrieved from 4865 // the offloading entry and launched by the runtime library. We also mark the 4866 // outlined function to have external linkage in case we are emitting code for 4867 // the device, because these functions will be entry points to the device. 4868 4869 if (CGM.getLangOpts().OpenMPIsDevice) { 4870 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 4871 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 4872 } else 4873 OutlinedFnID = new llvm::GlobalVariable( 4874 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 4875 llvm::GlobalValue::PrivateLinkage, 4876 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 4877 4878 // Register the information for the entry associated with this target region. 4879 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 4880 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 4881 /*Flags=*/0); 4882 } 4883 4884 /// discard all CompoundStmts intervening between two constructs 4885 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 4886 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 4887 Body = CS->body_front(); 4888 4889 return Body; 4890 } 4891 4892 /// Emit the number of teams for a target directive. Inspect the num_teams 4893 /// clause associated with a teams construct combined or closely nested 4894 /// with the target directive. 4895 /// 4896 /// Emit a team of size one for directives such as 'target parallel' that 4897 /// have no associated teams construct. 4898 /// 4899 /// Otherwise, return nullptr. 4900 static llvm::Value * 4901 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4902 CodeGenFunction &CGF, 4903 const OMPExecutableDirective &D) { 4904 4905 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4906 "teams directive expected to be " 4907 "emitted only for the host!"); 4908 4909 auto &Bld = CGF.Builder; 4910 4911 // If the target directive is combined with a teams directive: 4912 // Return the value in the num_teams clause, if any. 4913 // Otherwise, return 0 to denote the runtime default. 4914 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 4915 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 4916 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 4917 auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 4918 /*IgnoreResultAssign*/ true); 4919 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 4920 /*IsSigned=*/true); 4921 } 4922 4923 // The default value is 0. 4924 return Bld.getInt32(0); 4925 } 4926 4927 // If the target directive is combined with a parallel directive but not a 4928 // teams directive, start one team. 4929 if (isOpenMPParallelDirective(D.getDirectiveKind())) 4930 return Bld.getInt32(1); 4931 4932 // If the current target region has a teams region enclosed, we need to get 4933 // the number of teams to pass to the runtime function call. This is done 4934 // by generating the expression in a inlined region. This is required because 4935 // the expression is captured in the enclosing target environment when the 4936 // teams directive is not combined with target. 4937 4938 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4939 4940 // FIXME: Accommodate other combined directives with teams when they become 4941 // available. 4942 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4943 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4944 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 4945 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4946 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4947 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 4948 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 4949 /*IsSigned=*/true); 4950 } 4951 4952 // If we have an enclosed teams directive but no num_teams clause we use 4953 // the default value 0. 4954 return Bld.getInt32(0); 4955 } 4956 4957 // No teams associated with the directive. 4958 return nullptr; 4959 } 4960 4961 /// Emit the number of threads for a target directive. Inspect the 4962 /// thread_limit clause associated with a teams construct combined or closely 4963 /// nested with the target directive. 4964 /// 4965 /// Emit the num_threads clause for directives such as 'target parallel' that 4966 /// have no associated teams construct. 4967 /// 4968 /// Otherwise, return nullptr. 4969 static llvm::Value * 4970 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4971 CodeGenFunction &CGF, 4972 const OMPExecutableDirective &D) { 4973 4974 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4975 "teams directive expected to be " 4976 "emitted only for the host!"); 4977 4978 auto &Bld = CGF.Builder; 4979 4980 // 4981 // If the target directive is combined with a teams directive: 4982 // Return the value in the thread_limit clause, if any. 4983 // 4984 // If the target directive is combined with a parallel directive: 4985 // Return the value in the num_threads clause, if any. 4986 // 4987 // If both clauses are set, select the minimum of the two. 4988 // 4989 // If neither teams or parallel combined directives set the number of threads 4990 // in a team, return 0 to denote the runtime default. 4991 // 4992 // If this is not a teams directive return nullptr. 4993 4994 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 4995 isOpenMPParallelDirective(D.getDirectiveKind())) { 4996 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 4997 llvm::Value *NumThreadsVal = nullptr; 4998 llvm::Value *ThreadLimitVal = nullptr; 4999 5000 if (const auto *ThreadLimitClause = 5001 D.getSingleClause<OMPThreadLimitClause>()) { 5002 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 5003 auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 5004 /*IgnoreResultAssign*/ true); 5005 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 5006 /*IsSigned=*/true); 5007 } 5008 5009 if (const auto *NumThreadsClause = 5010 D.getSingleClause<OMPNumThreadsClause>()) { 5011 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 5012 llvm::Value *NumThreads = 5013 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 5014 /*IgnoreResultAssign*/ true); 5015 NumThreadsVal = 5016 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 5017 } 5018 5019 // Select the lesser of thread_limit and num_threads. 5020 if (NumThreadsVal) 5021 ThreadLimitVal = ThreadLimitVal 5022 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 5023 ThreadLimitVal), 5024 NumThreadsVal, ThreadLimitVal) 5025 : NumThreadsVal; 5026 5027 // Set default value passed to the runtime if either teams or a target 5028 // parallel type directive is found but no clause is specified. 5029 if (!ThreadLimitVal) 5030 ThreadLimitVal = DefaultThreadLimitVal; 5031 5032 return ThreadLimitVal; 5033 } 5034 5035 // If the current target region has a teams region enclosed, we need to get 5036 // the thread limit to pass to the runtime function call. This is done 5037 // by generating the expression in a inlined region. This is required because 5038 // the expression is captured in the enclosing target environment when the 5039 // teams directive is not combined with target. 5040 5041 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5042 5043 // FIXME: Accommodate other combined directives with teams when they become 5044 // available. 5045 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 5046 ignoreCompoundStmts(CS.getCapturedStmt()))) { 5047 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 5048 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 5049 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5050 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 5051 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 5052 /*IsSigned=*/true); 5053 } 5054 5055 // If we have an enclosed teams directive but no thread_limit clause we use 5056 // the default value 0. 5057 return CGF.Builder.getInt32(0); 5058 } 5059 5060 // No teams associated with the directive. 5061 return nullptr; 5062 } 5063 5064 namespace { 5065 // \brief Utility to handle information from clauses associated with a given 5066 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 5067 // It provides a convenient interface to obtain the information and generate 5068 // code for that information. 5069 class MappableExprsHandler { 5070 public: 5071 /// \brief Values for bit flags used to specify the mapping type for 5072 /// offloading. 5073 enum OpenMPOffloadMappingFlags { 5074 /// \brief Allocate memory on the device and move data from host to device. 5075 OMP_MAP_TO = 0x01, 5076 /// \brief Allocate memory on the device and move data from device to host. 5077 OMP_MAP_FROM = 0x02, 5078 /// \brief Always perform the requested mapping action on the element, even 5079 /// if it was already mapped before. 5080 OMP_MAP_ALWAYS = 0x04, 5081 /// \brief Delete the element from the device environment, ignoring the 5082 /// current reference count associated with the element. 5083 OMP_MAP_DELETE = 0x08, 5084 /// \brief The element being mapped is a pointer, therefore the pointee 5085 /// should be mapped as well. 5086 OMP_MAP_IS_PTR = 0x10, 5087 /// \brief This flags signals that an argument is the first one relating to 5088 /// a map/private clause expression. For some cases a single 5089 /// map/privatization results in multiple arguments passed to the runtime 5090 /// library. 5091 OMP_MAP_FIRST_REF = 0x20, 5092 /// \brief Signal that the runtime library has to return the device pointer 5093 /// in the current position for the data being mapped. 5094 OMP_MAP_RETURN_PTR = 0x40, 5095 /// \brief This flag signals that the reference being passed is a pointer to 5096 /// private data. 5097 OMP_MAP_PRIVATE_PTR = 0x80, 5098 /// \brief Pass the element to the device by value. 5099 OMP_MAP_PRIVATE_VAL = 0x100, 5100 }; 5101 5102 /// Class that associates information with a base pointer to be passed to the 5103 /// runtime library. 5104 class BasePointerInfo { 5105 /// The base pointer. 5106 llvm::Value *Ptr = nullptr; 5107 /// The base declaration that refers to this device pointer, or null if 5108 /// there is none. 5109 const ValueDecl *DevPtrDecl = nullptr; 5110 5111 public: 5112 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 5113 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 5114 llvm::Value *operator*() const { return Ptr; } 5115 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 5116 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 5117 }; 5118 5119 typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; 5120 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 5121 typedef SmallVector<unsigned, 16> MapFlagsArrayTy; 5122 5123 private: 5124 /// \brief Directive from where the map clauses were extracted. 5125 const OMPExecutableDirective &CurDir; 5126 5127 /// \brief Function the directive is being generated for. 5128 CodeGenFunction &CGF; 5129 5130 /// \brief Set of all first private variables in the current directive. 5131 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 5132 5133 /// Map between device pointer declarations and their expression components. 5134 /// The key value for declarations in 'this' is null. 5135 llvm::DenseMap< 5136 const ValueDecl *, 5137 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 5138 DevPointersMap; 5139 5140 llvm::Value *getExprTypeSize(const Expr *E) const { 5141 auto ExprTy = E->getType().getCanonicalType(); 5142 5143 // Reference types are ignored for mapping purposes. 5144 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 5145 ExprTy = RefTy->getPointeeType().getCanonicalType(); 5146 5147 // Given that an array section is considered a built-in type, we need to 5148 // do the calculation based on the length of the section instead of relying 5149 // on CGF.getTypeSize(E->getType()). 5150 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 5151 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 5152 OAE->getBase()->IgnoreParenImpCasts()) 5153 .getCanonicalType(); 5154 5155 // If there is no length associated with the expression, that means we 5156 // are using the whole length of the base. 5157 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 5158 return CGF.getTypeSize(BaseTy); 5159 5160 llvm::Value *ElemSize; 5161 if (auto *PTy = BaseTy->getAs<PointerType>()) 5162 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 5163 else { 5164 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 5165 assert(ATy && "Expecting array type if not a pointer type."); 5166 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 5167 } 5168 5169 // If we don't have a length at this point, that is because we have an 5170 // array section with a single element. 5171 if (!OAE->getLength()) 5172 return ElemSize; 5173 5174 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 5175 LengthVal = 5176 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 5177 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 5178 } 5179 return CGF.getTypeSize(ExprTy); 5180 } 5181 5182 /// \brief Return the corresponding bits for a given map clause modifier. Add 5183 /// a flag marking the map as a pointer if requested. Add a flag marking the 5184 /// map as the first one of a series of maps that relate to the same map 5185 /// expression. 5186 unsigned getMapTypeBits(OpenMPMapClauseKind MapType, 5187 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 5188 bool AddIsFirstFlag) const { 5189 unsigned Bits = 0u; 5190 switch (MapType) { 5191 case OMPC_MAP_alloc: 5192 case OMPC_MAP_release: 5193 // alloc and release is the default behavior in the runtime library, i.e. 5194 // if we don't pass any bits alloc/release that is what the runtime is 5195 // going to do. Therefore, we don't need to signal anything for these two 5196 // type modifiers. 5197 break; 5198 case OMPC_MAP_to: 5199 Bits = OMP_MAP_TO; 5200 break; 5201 case OMPC_MAP_from: 5202 Bits = OMP_MAP_FROM; 5203 break; 5204 case OMPC_MAP_tofrom: 5205 Bits = OMP_MAP_TO | OMP_MAP_FROM; 5206 break; 5207 case OMPC_MAP_delete: 5208 Bits = OMP_MAP_DELETE; 5209 break; 5210 default: 5211 llvm_unreachable("Unexpected map type!"); 5212 break; 5213 } 5214 if (AddPtrFlag) 5215 Bits |= OMP_MAP_IS_PTR; 5216 if (AddIsFirstFlag) 5217 Bits |= OMP_MAP_FIRST_REF; 5218 if (MapTypeModifier == OMPC_MAP_always) 5219 Bits |= OMP_MAP_ALWAYS; 5220 return Bits; 5221 } 5222 5223 /// \brief Return true if the provided expression is a final array section. A 5224 /// final array section, is one whose length can't be proved to be one. 5225 bool isFinalArraySectionExpression(const Expr *E) const { 5226 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 5227 5228 // It is not an array section and therefore not a unity-size one. 5229 if (!OASE) 5230 return false; 5231 5232 // An array section with no colon always refer to a single element. 5233 if (OASE->getColonLoc().isInvalid()) 5234 return false; 5235 5236 auto *Length = OASE->getLength(); 5237 5238 // If we don't have a length we have to check if the array has size 1 5239 // for this dimension. Also, we should always expect a length if the 5240 // base type is pointer. 5241 if (!Length) { 5242 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 5243 OASE->getBase()->IgnoreParenImpCasts()) 5244 .getCanonicalType(); 5245 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 5246 return ATy->getSize().getSExtValue() != 1; 5247 // If we don't have a constant dimension length, we have to consider 5248 // the current section as having any size, so it is not necessarily 5249 // unitary. If it happen to be unity size, that's user fault. 5250 return true; 5251 } 5252 5253 // Check if the length evaluates to 1. 5254 llvm::APSInt ConstLength; 5255 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 5256 return true; // Can have more that size 1. 5257 5258 return ConstLength.getSExtValue() != 1; 5259 } 5260 5261 /// \brief Generate the base pointers, section pointers, sizes and map type 5262 /// bits for the provided map type, map modifier, and expression components. 5263 /// \a IsFirstComponent should be set to true if the provided set of 5264 /// components is the first associated with a capture. 5265 void generateInfoForComponentList( 5266 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5267 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5268 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 5269 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 5270 bool IsFirstComponentList) const { 5271 5272 // The following summarizes what has to be generated for each map and the 5273 // types bellow. The generated information is expressed in this order: 5274 // base pointer, section pointer, size, flags 5275 // (to add to the ones that come from the map type and modifier). 5276 // 5277 // double d; 5278 // int i[100]; 5279 // float *p; 5280 // 5281 // struct S1 { 5282 // int i; 5283 // float f[50]; 5284 // } 5285 // struct S2 { 5286 // int i; 5287 // float f[50]; 5288 // S1 s; 5289 // double *p; 5290 // struct S2 *ps; 5291 // } 5292 // S2 s; 5293 // S2 *ps; 5294 // 5295 // map(d) 5296 // &d, &d, sizeof(double), noflags 5297 // 5298 // map(i) 5299 // &i, &i, 100*sizeof(int), noflags 5300 // 5301 // map(i[1:23]) 5302 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 5303 // 5304 // map(p) 5305 // &p, &p, sizeof(float*), noflags 5306 // 5307 // map(p[1:24]) 5308 // p, &p[1], 24*sizeof(float), noflags 5309 // 5310 // map(s) 5311 // &s, &s, sizeof(S2), noflags 5312 // 5313 // map(s.i) 5314 // &s, &(s.i), sizeof(int), noflags 5315 // 5316 // map(s.s.f) 5317 // &s, &(s.i.f), 50*sizeof(int), noflags 5318 // 5319 // map(s.p) 5320 // &s, &(s.p), sizeof(double*), noflags 5321 // 5322 // map(s.p[:22], s.a s.b) 5323 // &s, &(s.p), sizeof(double*), noflags 5324 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag 5325 // 5326 // map(s.ps) 5327 // &s, &(s.ps), sizeof(S2*), noflags 5328 // 5329 // map(s.ps->s.i) 5330 // &s, &(s.ps), sizeof(S2*), noflags 5331 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag 5332 // 5333 // map(s.ps->ps) 5334 // &s, &(s.ps), sizeof(S2*), noflags 5335 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5336 // 5337 // map(s.ps->ps->ps) 5338 // &s, &(s.ps), sizeof(S2*), noflags 5339 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5340 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5341 // 5342 // map(s.ps->ps->s.f[:22]) 5343 // &s, &(s.ps), sizeof(S2*), noflags 5344 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 5345 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag 5346 // 5347 // map(ps) 5348 // &ps, &ps, sizeof(S2*), noflags 5349 // 5350 // map(ps->i) 5351 // ps, &(ps->i), sizeof(int), noflags 5352 // 5353 // map(ps->s.f) 5354 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 5355 // 5356 // map(ps->p) 5357 // ps, &(ps->p), sizeof(double*), noflags 5358 // 5359 // map(ps->p[:22]) 5360 // ps, &(ps->p), sizeof(double*), noflags 5361 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag 5362 // 5363 // map(ps->ps) 5364 // ps, &(ps->ps), sizeof(S2*), noflags 5365 // 5366 // map(ps->ps->s.i) 5367 // ps, &(ps->ps), sizeof(S2*), noflags 5368 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag 5369 // 5370 // map(ps->ps->ps) 5371 // ps, &(ps->ps), sizeof(S2*), noflags 5372 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5373 // 5374 // map(ps->ps->ps->ps) 5375 // ps, &(ps->ps), sizeof(S2*), noflags 5376 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5377 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5378 // 5379 // map(ps->ps->ps->s.f[:22]) 5380 // ps, &(ps->ps), sizeof(S2*), noflags 5381 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 5382 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + 5383 // extra_flag 5384 5385 // Track if the map information being generated is the first for a capture. 5386 bool IsCaptureFirstInfo = IsFirstComponentList; 5387 5388 // Scan the components from the base to the complete expression. 5389 auto CI = Components.rbegin(); 5390 auto CE = Components.rend(); 5391 auto I = CI; 5392 5393 // Track if the map information being generated is the first for a list of 5394 // components. 5395 bool IsExpressionFirstInfo = true; 5396 llvm::Value *BP = nullptr; 5397 5398 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 5399 // The base is the 'this' pointer. The content of the pointer is going 5400 // to be the base of the field being mapped. 5401 BP = CGF.EmitScalarExpr(ME->getBase()); 5402 } else { 5403 // The base is the reference to the variable. 5404 // BP = &Var. 5405 BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) 5406 .getPointer(); 5407 5408 // If the variable is a pointer and is being dereferenced (i.e. is not 5409 // the last component), the base has to be the pointer itself, not its 5410 // reference. References are ignored for mapping purposes. 5411 QualType Ty = 5412 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 5413 if (Ty->isAnyPointerType() && std::next(I) != CE) { 5414 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); 5415 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 5416 Ty->castAs<PointerType>()) 5417 .getPointer(); 5418 5419 // We do not need to generate individual map information for the 5420 // pointer, it can be associated with the combined storage. 5421 ++I; 5422 } 5423 } 5424 5425 for (; I != CE; ++I) { 5426 auto Next = std::next(I); 5427 5428 // We need to generate the addresses and sizes if this is the last 5429 // component, if the component is a pointer or if it is an array section 5430 // whose length can't be proved to be one. If this is a pointer, it 5431 // becomes the base address for the following components. 5432 5433 // A final array section, is one whose length can't be proved to be one. 5434 bool IsFinalArraySection = 5435 isFinalArraySectionExpression(I->getAssociatedExpression()); 5436 5437 // Get information on whether the element is a pointer. Have to do a 5438 // special treatment for array sections given that they are built-in 5439 // types. 5440 const auto *OASE = 5441 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 5442 bool IsPointer = 5443 (OASE && 5444 OMPArraySectionExpr::getBaseOriginalType(OASE) 5445 .getCanonicalType() 5446 ->isAnyPointerType()) || 5447 I->getAssociatedExpression()->getType()->isAnyPointerType(); 5448 5449 if (Next == CE || IsPointer || IsFinalArraySection) { 5450 5451 // If this is not the last component, we expect the pointer to be 5452 // associated with an array expression or member expression. 5453 assert((Next == CE || 5454 isa<MemberExpr>(Next->getAssociatedExpression()) || 5455 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 5456 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 5457 "Unexpected expression"); 5458 5459 auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); 5460 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 5461 5462 // If we have a member expression and the current component is a 5463 // reference, we have to map the reference too. Whenever we have a 5464 // reference, the section that reference refers to is going to be a 5465 // load instruction from the storage assigned to the reference. 5466 if (isa<MemberExpr>(I->getAssociatedExpression()) && 5467 I->getAssociatedDeclaration()->getType()->isReferenceType()) { 5468 auto *LI = cast<llvm::LoadInst>(LB); 5469 auto *RefAddr = LI->getPointerOperand(); 5470 5471 BasePointers.push_back(BP); 5472 Pointers.push_back(RefAddr); 5473 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 5474 Types.push_back(getMapTypeBits( 5475 /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, 5476 !IsExpressionFirstInfo, IsCaptureFirstInfo)); 5477 IsExpressionFirstInfo = false; 5478 IsCaptureFirstInfo = false; 5479 // The reference will be the next base address. 5480 BP = RefAddr; 5481 } 5482 5483 BasePointers.push_back(BP); 5484 Pointers.push_back(LB); 5485 Sizes.push_back(Size); 5486 5487 // We need to add a pointer flag for each map that comes from the 5488 // same expression except for the first one. We also need to signal 5489 // this map is the first one that relates with the current capture 5490 // (there is a set of entries for each capture). 5491 Types.push_back(getMapTypeBits(MapType, MapTypeModifier, 5492 !IsExpressionFirstInfo, 5493 IsCaptureFirstInfo)); 5494 5495 // If we have a final array section, we are done with this expression. 5496 if (IsFinalArraySection) 5497 break; 5498 5499 // The pointer becomes the base for the next element. 5500 if (Next != CE) 5501 BP = LB; 5502 5503 IsExpressionFirstInfo = false; 5504 IsCaptureFirstInfo = false; 5505 continue; 5506 } 5507 } 5508 } 5509 5510 /// \brief Return the adjusted map modifiers if the declaration a capture 5511 /// refers to appears in a first-private clause. This is expected to be used 5512 /// only with directives that start with 'target'. 5513 unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, 5514 unsigned CurrentModifiers) { 5515 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 5516 5517 // A first private variable captured by reference will use only the 5518 // 'private ptr' and 'map to' flag. Return the right flags if the captured 5519 // declaration is known as first-private in this handler. 5520 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 5521 return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | 5522 MappableExprsHandler::OMP_MAP_TO; 5523 5524 // We didn't modify anything. 5525 return CurrentModifiers; 5526 } 5527 5528 public: 5529 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 5530 : CurDir(Dir), CGF(CGF) { 5531 // Extract firstprivate clause information. 5532 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 5533 for (const auto *D : C->varlists()) 5534 FirstPrivateDecls.insert( 5535 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 5536 // Extract device pointer clause information. 5537 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 5538 for (auto L : C->component_lists()) 5539 DevPointersMap[L.first].push_back(L.second); 5540 } 5541 5542 /// \brief Generate all the base pointers, section pointers, sizes and map 5543 /// types for the extracted mappable expressions. Also, for each item that 5544 /// relates with a device pointer, a pair of the relevant declaration and 5545 /// index where it occurs is appended to the device pointers info array. 5546 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 5547 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 5548 MapFlagsArrayTy &Types) const { 5549 BasePointers.clear(); 5550 Pointers.clear(); 5551 Sizes.clear(); 5552 Types.clear(); 5553 5554 struct MapInfo { 5555 /// Kind that defines how a device pointer has to be returned. 5556 enum ReturnPointerKind { 5557 // Don't have to return any pointer. 5558 RPK_None, 5559 // Pointer is the base of the declaration. 5560 RPK_Base, 5561 // Pointer is a member of the base declaration - 'this' 5562 RPK_Member, 5563 // Pointer is a reference and a member of the base declaration - 'this' 5564 RPK_MemberReference, 5565 }; 5566 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 5567 OpenMPMapClauseKind MapType; 5568 OpenMPMapClauseKind MapTypeModifier; 5569 ReturnPointerKind ReturnDevicePointer; 5570 5571 MapInfo() 5572 : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), 5573 ReturnDevicePointer(RPK_None) {} 5574 MapInfo( 5575 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 5576 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 5577 ReturnPointerKind ReturnDevicePointer) 5578 : Components(Components), MapType(MapType), 5579 MapTypeModifier(MapTypeModifier), 5580 ReturnDevicePointer(ReturnDevicePointer) {} 5581 }; 5582 5583 // We have to process the component lists that relate with the same 5584 // declaration in a single chunk so that we can generate the map flags 5585 // correctly. Therefore, we organize all lists in a map. 5586 llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 5587 5588 // Helper function to fill the information map for the different supported 5589 // clauses. 5590 auto &&InfoGen = [&Info]( 5591 const ValueDecl *D, 5592 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 5593 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 5594 MapInfo::ReturnPointerKind ReturnDevicePointer) { 5595 const ValueDecl *VD = 5596 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 5597 Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); 5598 }; 5599 5600 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5601 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 5602 for (auto L : C->component_lists()) 5603 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 5604 MapInfo::RPK_None); 5605 for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 5606 for (auto L : C->component_lists()) 5607 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 5608 MapInfo::RPK_None); 5609 for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 5610 for (auto L : C->component_lists()) 5611 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 5612 MapInfo::RPK_None); 5613 5614 // Look at the use_device_ptr clause information and mark the existing map 5615 // entries as such. If there is no map information for an entry in the 5616 // use_device_ptr list, we create one with map type 'alloc' and zero size 5617 // section. It is the user fault if that was not mapped before. 5618 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5619 for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) 5620 for (auto L : C->component_lists()) { 5621 assert(!L.second.empty() && "Not expecting empty list of components!"); 5622 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 5623 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 5624 auto *IE = L.second.back().getAssociatedExpression(); 5625 // If the first component is a member expression, we have to look into 5626 // 'this', which maps to null in the map of map information. Otherwise 5627 // look directly for the information. 5628 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 5629 5630 // We potentially have map information for this declaration already. 5631 // Look for the first set of components that refer to it. 5632 if (It != Info.end()) { 5633 auto CI = std::find_if( 5634 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 5635 return MI.Components.back().getAssociatedDeclaration() == VD; 5636 }); 5637 // If we found a map entry, signal that the pointer has to be returned 5638 // and move on to the next declaration. 5639 if (CI != It->second.end()) { 5640 CI->ReturnDevicePointer = isa<MemberExpr>(IE) 5641 ? (VD->getType()->isReferenceType() 5642 ? MapInfo::RPK_MemberReference 5643 : MapInfo::RPK_Member) 5644 : MapInfo::RPK_Base; 5645 continue; 5646 } 5647 } 5648 5649 // We didn't find any match in our map information - generate a zero 5650 // size array section. 5651 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 5652 llvm::Value *Ptr = 5653 this->CGF 5654 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation()) 5655 .getScalarVal(); 5656 BasePointers.push_back({Ptr, VD}); 5657 Pointers.push_back(Ptr); 5658 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 5659 Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); 5660 } 5661 5662 for (auto &M : Info) { 5663 // We need to know when we generate information for the first component 5664 // associated with a capture, because the mapping flags depend on it. 5665 bool IsFirstComponentList = true; 5666 for (MapInfo &L : M.second) { 5667 assert(!L.Components.empty() && 5668 "Not expecting declaration with no component lists."); 5669 5670 // Remember the current base pointer index. 5671 unsigned CurrentBasePointersIdx = BasePointers.size(); 5672 // FIXME: MSVC 2013 seems to require this-> to find the member method. 5673 this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, 5674 L.Components, BasePointers, Pointers, 5675 Sizes, Types, IsFirstComponentList); 5676 5677 // If this entry relates with a device pointer, set the relevant 5678 // declaration and add the 'return pointer' flag. 5679 if (IsFirstComponentList && 5680 L.ReturnDevicePointer != MapInfo::RPK_None) { 5681 // If the pointer is not the base of the map, we need to skip the 5682 // base. If it is a reference in a member field, we also need to skip 5683 // the map of the reference. 5684 if (L.ReturnDevicePointer != MapInfo::RPK_Base) { 5685 ++CurrentBasePointersIdx; 5686 if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) 5687 ++CurrentBasePointersIdx; 5688 } 5689 assert(BasePointers.size() > CurrentBasePointersIdx && 5690 "Unexpected number of mapped base pointers."); 5691 5692 auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); 5693 assert(RelevantVD && 5694 "No relevant declaration related with device pointer??"); 5695 5696 BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 5697 Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; 5698 } 5699 IsFirstComponentList = false; 5700 } 5701 } 5702 } 5703 5704 /// \brief Generate the base pointers, section pointers, sizes and map types 5705 /// associated to a given capture. 5706 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 5707 llvm::Value *Arg, 5708 MapBaseValuesArrayTy &BasePointers, 5709 MapValuesArrayTy &Pointers, 5710 MapValuesArrayTy &Sizes, 5711 MapFlagsArrayTy &Types) const { 5712 assert(!Cap->capturesVariableArrayType() && 5713 "Not expecting to generate map info for a variable array type!"); 5714 5715 BasePointers.clear(); 5716 Pointers.clear(); 5717 Sizes.clear(); 5718 Types.clear(); 5719 5720 // We need to know when we generating information for the first component 5721 // associated with a capture, because the mapping flags depend on it. 5722 bool IsFirstComponentList = true; 5723 5724 const ValueDecl *VD = 5725 Cap->capturesThis() 5726 ? nullptr 5727 : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); 5728 5729 // If this declaration appears in a is_device_ptr clause we just have to 5730 // pass the pointer by value. If it is a reference to a declaration, we just 5731 // pass its value, otherwise, if it is a member expression, we need to map 5732 // 'to' the field. 5733 if (!VD) { 5734 auto It = DevPointersMap.find(VD); 5735 if (It != DevPointersMap.end()) { 5736 for (auto L : It->second) { 5737 generateInfoForComponentList( 5738 /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, 5739 BasePointers, Pointers, Sizes, Types, IsFirstComponentList); 5740 IsFirstComponentList = false; 5741 } 5742 return; 5743 } 5744 } else if (DevPointersMap.count(VD)) { 5745 BasePointers.push_back({Arg, VD}); 5746 Pointers.push_back(Arg); 5747 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 5748 Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF); 5749 return; 5750 } 5751 5752 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 5753 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 5754 for (auto L : C->decl_component_lists(VD)) { 5755 assert(L.first == VD && 5756 "We got information for the wrong declaration??"); 5757 assert(!L.second.empty() && 5758 "Not expecting declaration with no component lists."); 5759 generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), 5760 L.second, BasePointers, Pointers, Sizes, 5761 Types, IsFirstComponentList); 5762 IsFirstComponentList = false; 5763 } 5764 5765 return; 5766 } 5767 5768 /// \brief Generate the default map information for a given capture \a CI, 5769 /// record field declaration \a RI and captured value \a CV. 5770 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 5771 const FieldDecl &RI, llvm::Value *CV, 5772 MapBaseValuesArrayTy &CurBasePointers, 5773 MapValuesArrayTy &CurPointers, 5774 MapValuesArrayTy &CurSizes, 5775 MapFlagsArrayTy &CurMapTypes) { 5776 5777 // Do the default mapping. 5778 if (CI.capturesThis()) { 5779 CurBasePointers.push_back(CV); 5780 CurPointers.push_back(CV); 5781 const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 5782 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 5783 // Default map type. 5784 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 5785 } else if (CI.capturesVariableByCopy()) { 5786 CurBasePointers.push_back(CV); 5787 CurPointers.push_back(CV); 5788 if (!RI.getType()->isAnyPointerType()) { 5789 // We have to signal to the runtime captures passed by value that are 5790 // not pointers. 5791 CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); 5792 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 5793 } else { 5794 // Pointers are implicitly mapped with a zero size and no flags 5795 // (other than first map that is added for all implicit maps). 5796 CurMapTypes.push_back(0u); 5797 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 5798 } 5799 } else { 5800 assert(CI.capturesVariable() && "Expected captured reference."); 5801 CurBasePointers.push_back(CV); 5802 CurPointers.push_back(CV); 5803 5804 const ReferenceType *PtrTy = 5805 cast<ReferenceType>(RI.getType().getTypePtr()); 5806 QualType ElementType = PtrTy->getPointeeType(); 5807 CurSizes.push_back(CGF.getTypeSize(ElementType)); 5808 // The default map type for a scalar/complex type is 'to' because by 5809 // default the value doesn't have to be retrieved. For an aggregate 5810 // type, the default is 'tofrom'. 5811 CurMapTypes.push_back(ElementType->isAggregateType() 5812 ? (OMP_MAP_TO | OMP_MAP_FROM) 5813 : OMP_MAP_TO); 5814 5815 // If we have a capture by reference we may need to add the private 5816 // pointer flag if the base declaration shows in some first-private 5817 // clause. 5818 CurMapTypes.back() = 5819 adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); 5820 } 5821 // Every default map produces a single argument, so, it is always the 5822 // first one. 5823 CurMapTypes.back() |= OMP_MAP_FIRST_REF; 5824 } 5825 }; 5826 5827 enum OpenMPOffloadingReservedDeviceIDs { 5828 /// \brief Device ID if the device was not defined, runtime should get it 5829 /// from environment variables in the spec. 5830 OMP_DEVICEID_UNDEF = -1, 5831 }; 5832 } // anonymous namespace 5833 5834 /// \brief Emit the arrays used to pass the captures and map information to the 5835 /// offloading runtime library. If there is no map or capture information, 5836 /// return nullptr by reference. 5837 static void 5838 emitOffloadingArrays(CodeGenFunction &CGF, 5839 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 5840 MappableExprsHandler::MapValuesArrayTy &Pointers, 5841 MappableExprsHandler::MapValuesArrayTy &Sizes, 5842 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 5843 CGOpenMPRuntime::TargetDataInfo &Info) { 5844 auto &CGM = CGF.CGM; 5845 auto &Ctx = CGF.getContext(); 5846 5847 // Reset the array information. 5848 Info.clearArrayInfo(); 5849 Info.NumberOfPtrs = BasePointers.size(); 5850 5851 if (Info.NumberOfPtrs) { 5852 // Detect if we have any capture size requiring runtime evaluation of the 5853 // size so that a constant array could be eventually used. 5854 bool hasRuntimeEvaluationCaptureSize = false; 5855 for (auto *S : Sizes) 5856 if (!isa<llvm::Constant>(S)) { 5857 hasRuntimeEvaluationCaptureSize = true; 5858 break; 5859 } 5860 5861 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 5862 QualType PointerArrayType = 5863 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 5864 /*IndexTypeQuals=*/0); 5865 5866 Info.BasePointersArray = 5867 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 5868 Info.PointersArray = 5869 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 5870 5871 // If we don't have any VLA types or other types that require runtime 5872 // evaluation, we can use a constant array for the map sizes, otherwise we 5873 // need to fill up the arrays as we do for the pointers. 5874 if (hasRuntimeEvaluationCaptureSize) { 5875 QualType SizeArrayType = Ctx.getConstantArrayType( 5876 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 5877 /*IndexTypeQuals=*/0); 5878 Info.SizesArray = 5879 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 5880 } else { 5881 // We expect all the sizes to be constant, so we collect them to create 5882 // a constant array. 5883 SmallVector<llvm::Constant *, 16> ConstSizes; 5884 for (auto S : Sizes) 5885 ConstSizes.push_back(cast<llvm::Constant>(S)); 5886 5887 auto *SizesArrayInit = llvm::ConstantArray::get( 5888 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 5889 auto *SizesArrayGbl = new llvm::GlobalVariable( 5890 CGM.getModule(), SizesArrayInit->getType(), 5891 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5892 SizesArrayInit, ".offload_sizes"); 5893 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 5894 Info.SizesArray = SizesArrayGbl; 5895 } 5896 5897 // The map types are always constant so we don't need to generate code to 5898 // fill arrays. Instead, we create an array constant. 5899 llvm::Constant *MapTypesArrayInit = 5900 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 5901 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 5902 CGM.getModule(), MapTypesArrayInit->getType(), 5903 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5904 MapTypesArrayInit, ".offload_maptypes"); 5905 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 5906 Info.MapTypesArray = MapTypesArrayGbl; 5907 5908 for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { 5909 llvm::Value *BPVal = *BasePointers[i]; 5910 if (BPVal->getType()->isPointerTy()) 5911 BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); 5912 else { 5913 assert(BPVal->getType()->isIntegerTy() && 5914 "If not a pointer, the value type must be an integer."); 5915 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); 5916 } 5917 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 5918 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5919 Info.BasePointersArray, 0, i); 5920 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5921 CGF.Builder.CreateStore(BPVal, BPAddr); 5922 5923 if (Info.requiresDevicePointerInfo()) 5924 if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) 5925 Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); 5926 5927 llvm::Value *PVal = Pointers[i]; 5928 if (PVal->getType()->isPointerTy()) 5929 PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); 5930 else { 5931 assert(PVal->getType()->isIntegerTy() && 5932 "If not a pointer, the value type must be an integer."); 5933 PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); 5934 } 5935 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 5936 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5937 Info.PointersArray, 0, i); 5938 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5939 CGF.Builder.CreateStore(PVal, PAddr); 5940 5941 if (hasRuntimeEvaluationCaptureSize) { 5942 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 5943 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 5944 Info.SizesArray, 5945 /*Idx0=*/0, 5946 /*Idx1=*/i); 5947 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 5948 CGF.Builder.CreateStore( 5949 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 5950 SAddr); 5951 } 5952 } 5953 } 5954 } 5955 /// \brief Emit the arguments to be passed to the runtime library based on the 5956 /// arrays of pointers, sizes and map types. 5957 static void emitOffloadingArraysArgument( 5958 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 5959 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 5960 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 5961 auto &CGM = CGF.CGM; 5962 if (Info.NumberOfPtrs) { 5963 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5964 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5965 Info.BasePointersArray, 5966 /*Idx0=*/0, /*Idx1=*/0); 5967 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5968 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 5969 Info.PointersArray, 5970 /*Idx0=*/0, 5971 /*Idx1=*/0); 5972 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5973 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 5974 /*Idx0=*/0, /*Idx1=*/0); 5975 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5976 llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), 5977 Info.MapTypesArray, 5978 /*Idx0=*/0, 5979 /*Idx1=*/0); 5980 } else { 5981 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5982 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5983 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 5984 MapTypesArrayArg = 5985 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 5986 } 5987 } 5988 5989 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 5990 const OMPExecutableDirective &D, 5991 llvm::Value *OutlinedFn, 5992 llvm::Value *OutlinedFnID, 5993 const Expr *IfCond, const Expr *Device, 5994 ArrayRef<llvm::Value *> CapturedVars) { 5995 if (!CGF.HaveInsertPoint()) 5996 return; 5997 5998 assert(OutlinedFn && "Invalid outlined function!"); 5999 6000 auto &Ctx = CGF.getContext(); 6001 6002 // Fill up the arrays with all the captured variables. 6003 MappableExprsHandler::MapValuesArrayTy KernelArgs; 6004 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6005 MappableExprsHandler::MapValuesArrayTy Pointers; 6006 MappableExprsHandler::MapValuesArrayTy Sizes; 6007 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6008 6009 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 6010 MappableExprsHandler::MapValuesArrayTy CurPointers; 6011 MappableExprsHandler::MapValuesArrayTy CurSizes; 6012 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 6013 6014 // Get mappable expression information. 6015 MappableExprsHandler MEHandler(D, CGF); 6016 6017 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 6018 auto RI = CS.getCapturedRecordDecl()->field_begin(); 6019 auto CV = CapturedVars.begin(); 6020 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 6021 CE = CS.capture_end(); 6022 CI != CE; ++CI, ++RI, ++CV) { 6023 StringRef Name; 6024 QualType Ty; 6025 6026 CurBasePointers.clear(); 6027 CurPointers.clear(); 6028 CurSizes.clear(); 6029 CurMapTypes.clear(); 6030 6031 // VLA sizes are passed to the outlined region by copy and do not have map 6032 // information associated. 6033 if (CI->capturesVariableArrayType()) { 6034 CurBasePointers.push_back(*CV); 6035 CurPointers.push_back(*CV); 6036 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 6037 // Copy to the device as an argument. No need to retrieve it. 6038 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | 6039 MappableExprsHandler::OMP_MAP_FIRST_REF); 6040 } else { 6041 // If we have any information in the map clause, we use it, otherwise we 6042 // just do a default mapping. 6043 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 6044 CurSizes, CurMapTypes); 6045 if (CurBasePointers.empty()) 6046 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 6047 CurPointers, CurSizes, CurMapTypes); 6048 } 6049 // We expect to have at least an element of information for this capture. 6050 assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); 6051 assert(CurBasePointers.size() == CurPointers.size() && 6052 CurBasePointers.size() == CurSizes.size() && 6053 CurBasePointers.size() == CurMapTypes.size() && 6054 "Inconsistent map information sizes!"); 6055 6056 // The kernel args are always the first elements of the base pointers 6057 // associated with a capture. 6058 KernelArgs.push_back(*CurBasePointers.front()); 6059 // We need to append the results of this capture to what we already have. 6060 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 6061 Pointers.append(CurPointers.begin(), CurPointers.end()); 6062 Sizes.append(CurSizes.begin(), CurSizes.end()); 6063 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 6064 } 6065 6066 // Keep track on whether the host function has to be executed. 6067 auto OffloadErrorQType = 6068 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 6069 auto OffloadError = CGF.MakeAddrLValue( 6070 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 6071 OffloadErrorQType); 6072 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 6073 OffloadError); 6074 6075 // Fill up the pointer arrays and transfer execution to the device. 6076 auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device, 6077 OutlinedFnID, OffloadError, 6078 &D](CodeGenFunction &CGF, PrePostActionTy &) { 6079 auto &RT = CGF.CGM.getOpenMPRuntime(); 6080 // Emit the offloading arrays. 6081 TargetDataInfo Info; 6082 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6083 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 6084 Info.PointersArray, Info.SizesArray, 6085 Info.MapTypesArray, Info); 6086 6087 // On top of the arrays that were filled up, the target offloading call 6088 // takes as arguments the device id as well as the host pointer. The host 6089 // pointer is used by the runtime library to identify the current target 6090 // region, so it only has to be unique and not necessarily point to 6091 // anything. It could be the pointer to the outlined function that 6092 // implements the target region, but we aren't using that so that the 6093 // compiler doesn't need to keep that, and could therefore inline the host 6094 // function if proven worthwhile during optimization. 6095 6096 // From this point on, we need to have an ID of the target region defined. 6097 assert(OutlinedFnID && "Invalid outlined function ID!"); 6098 6099 // Emit device ID if any. 6100 llvm::Value *DeviceID; 6101 if (Device) 6102 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6103 CGF.Int32Ty, /*isSigned=*/true); 6104 else 6105 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6106 6107 // Emit the number of elements in the offloading arrays. 6108 llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6109 6110 // Return value of the runtime offloading call. 6111 llvm::Value *Return; 6112 6113 auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); 6114 auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); 6115 6116 // The target region is an outlined function launched by the runtime 6117 // via calls __tgt_target() or __tgt_target_teams(). 6118 // 6119 // __tgt_target() launches a target region with one team and one thread, 6120 // executing a serial region. This master thread may in turn launch 6121 // more threads within its team upon encountering a parallel region, 6122 // however, no additional teams can be launched on the device. 6123 // 6124 // __tgt_target_teams() launches a target region with one or more teams, 6125 // each with one or more threads. This call is required for target 6126 // constructs such as: 6127 // 'target teams' 6128 // 'target' / 'teams' 6129 // 'target teams distribute parallel for' 6130 // 'target parallel' 6131 // and so on. 6132 // 6133 // Note that on the host and CPU targets, the runtime implementation of 6134 // these calls simply call the outlined function without forking threads. 6135 // The outlined functions themselves have runtime calls to 6136 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 6137 // the compiler in emitTeamsCall() and emitParallelCall(). 6138 // 6139 // In contrast, on the NVPTX target, the implementation of 6140 // __tgt_target_teams() launches a GPU kernel with the requested number 6141 // of teams and threads so no additional calls to the runtime are required. 6142 if (NumTeams) { 6143 // If we have NumTeams defined this means that we have an enclosed teams 6144 // region. Therefore we also expect to have NumThreads defined. These two 6145 // values should be defined in the presence of a teams directive, 6146 // regardless of having any clauses associated. If the user is using teams 6147 // but no clauses, these two values will be the default that should be 6148 // passed to the runtime library - a 32-bit integer with the value zero. 6149 assert(NumThreads && "Thread limit expression should be available along " 6150 "with number of teams."); 6151 llvm::Value *OffloadingArgs[] = { 6152 DeviceID, OutlinedFnID, 6153 PointerNum, Info.BasePointersArray, 6154 Info.PointersArray, Info.SizesArray, 6155 Info.MapTypesArray, NumTeams, 6156 NumThreads}; 6157 Return = CGF.EmitRuntimeCall( 6158 RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); 6159 } else { 6160 llvm::Value *OffloadingArgs[] = { 6161 DeviceID, OutlinedFnID, 6162 PointerNum, Info.BasePointersArray, 6163 Info.PointersArray, Info.SizesArray, 6164 Info.MapTypesArray}; 6165 Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), 6166 OffloadingArgs); 6167 } 6168 6169 CGF.EmitStoreOfScalar(Return, OffloadError); 6170 }; 6171 6172 // Notify that the host version must be executed. 6173 auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { 6174 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), 6175 OffloadError); 6176 }; 6177 6178 // If we have a target function ID it means that we need to support 6179 // offloading, otherwise, just execute on the host. We need to execute on host 6180 // regardless of the conditional in the if clause if, e.g., the user do not 6181 // specify target triples. 6182 if (OutlinedFnID) { 6183 if (IfCond) 6184 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6185 else { 6186 RegionCodeGenTy ThenRCG(ThenGen); 6187 ThenRCG(CGF); 6188 } 6189 } else { 6190 RegionCodeGenTy ElseRCG(ElseGen); 6191 ElseRCG(CGF); 6192 } 6193 6194 // Check the error code and execute the host version if required. 6195 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 6196 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 6197 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 6198 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 6199 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 6200 6201 CGF.EmitBlock(OffloadFailedBlock); 6202 CGF.Builder.CreateCall(OutlinedFn, KernelArgs); 6203 CGF.EmitBranch(OffloadContBlock); 6204 6205 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 6206 } 6207 6208 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 6209 StringRef ParentName) { 6210 if (!S) 6211 return; 6212 6213 // Codegen OMP target directives that offload compute to the device. 6214 bool requiresDeviceCodegen = 6215 isa<OMPExecutableDirective>(S) && 6216 isOpenMPTargetExecutionDirective( 6217 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 6218 6219 if (requiresDeviceCodegen) { 6220 auto &E = *cast<OMPExecutableDirective>(S); 6221 unsigned DeviceID; 6222 unsigned FileID; 6223 unsigned Line; 6224 getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, 6225 FileID, Line); 6226 6227 // Is this a target region that should not be emitted as an entry point? If 6228 // so just signal we are done with this target region. 6229 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 6230 ParentName, Line)) 6231 return; 6232 6233 switch (S->getStmtClass()) { 6234 case Stmt::OMPTargetDirectiveClass: 6235 CodeGenFunction::EmitOMPTargetDeviceFunction( 6236 CGM, ParentName, cast<OMPTargetDirective>(*S)); 6237 break; 6238 case Stmt::OMPTargetParallelDirectiveClass: 6239 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 6240 CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); 6241 break; 6242 case Stmt::OMPTargetTeamsDirectiveClass: 6243 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 6244 CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); 6245 break; 6246 default: 6247 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 6248 } 6249 return; 6250 } 6251 6252 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 6253 if (!E->hasAssociatedStmt()) 6254 return; 6255 6256 scanForTargetRegionsFunctions( 6257 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 6258 ParentName); 6259 return; 6260 } 6261 6262 // If this is a lambda function, look into its body. 6263 if (auto *L = dyn_cast<LambdaExpr>(S)) 6264 S = L->getBody(); 6265 6266 // Keep looking for target regions recursively. 6267 for (auto *II : S->children()) 6268 scanForTargetRegionsFunctions(II, ParentName); 6269 } 6270 6271 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 6272 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 6273 6274 // If emitting code for the host, we do not process FD here. Instead we do 6275 // the normal code generation. 6276 if (!CGM.getLangOpts().OpenMPIsDevice) 6277 return false; 6278 6279 // Try to detect target regions in the function. 6280 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 6281 6282 // We should not emit any function other that the ones created during the 6283 // scanning. Therefore, we signal that this function is completely dealt 6284 // with. 6285 return true; 6286 } 6287 6288 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 6289 if (!CGM.getLangOpts().OpenMPIsDevice) 6290 return false; 6291 6292 // Check if there are Ctors/Dtors in this declaration and look for target 6293 // regions in it. We use the complete variant to produce the kernel name 6294 // mangling. 6295 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 6296 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 6297 for (auto *Ctor : RD->ctors()) { 6298 StringRef ParentName = 6299 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 6300 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 6301 } 6302 auto *Dtor = RD->getDestructor(); 6303 if (Dtor) { 6304 StringRef ParentName = 6305 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 6306 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 6307 } 6308 } 6309 6310 // If we are in target mode we do not emit any global (declare target is not 6311 // implemented yet). Therefore we signal that GD was processed in this case. 6312 return true; 6313 } 6314 6315 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 6316 auto *VD = GD.getDecl(); 6317 if (isa<FunctionDecl>(VD)) 6318 return emitTargetFunctions(GD); 6319 6320 return emitTargetGlobalVariable(GD); 6321 } 6322 6323 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 6324 // If we have offloading in the current module, we need to emit the entries 6325 // now and register the offloading descriptor. 6326 createOffloadEntriesAndInfoMetadata(); 6327 6328 // Create and register the offloading binary descriptors. This is the main 6329 // entity that captures all the information about offloading in the current 6330 // compilation unit. 6331 return createOffloadingBinaryDescriptorRegistration(); 6332 } 6333 6334 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 6335 const OMPExecutableDirective &D, 6336 SourceLocation Loc, 6337 llvm::Value *OutlinedFn, 6338 ArrayRef<llvm::Value *> CapturedVars) { 6339 if (!CGF.HaveInsertPoint()) 6340 return; 6341 6342 auto *RTLoc = emitUpdateLocation(CGF, Loc); 6343 CodeGenFunction::RunCleanupsScope Scope(CGF); 6344 6345 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 6346 llvm::Value *Args[] = { 6347 RTLoc, 6348 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 6349 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 6350 llvm::SmallVector<llvm::Value *, 16> RealArgs; 6351 RealArgs.append(std::begin(Args), std::end(Args)); 6352 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 6353 6354 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 6355 CGF.EmitRuntimeCall(RTLFn, RealArgs); 6356 } 6357 6358 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 6359 const Expr *NumTeams, 6360 const Expr *ThreadLimit, 6361 SourceLocation Loc) { 6362 if (!CGF.HaveInsertPoint()) 6363 return; 6364 6365 auto *RTLoc = emitUpdateLocation(CGF, Loc); 6366 6367 llvm::Value *NumTeamsVal = 6368 (NumTeams) 6369 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 6370 CGF.CGM.Int32Ty, /* isSigned = */ true) 6371 : CGF.Builder.getInt32(0); 6372 6373 llvm::Value *ThreadLimitVal = 6374 (ThreadLimit) 6375 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 6376 CGF.CGM.Int32Ty, /* isSigned = */ true) 6377 : CGF.Builder.getInt32(0); 6378 6379 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 6380 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 6381 ThreadLimitVal}; 6382 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 6383 PushNumTeamsArgs); 6384 } 6385 6386 void CGOpenMPRuntime::emitTargetDataCalls( 6387 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6388 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 6389 if (!CGF.HaveInsertPoint()) 6390 return; 6391 6392 // Action used to replace the default codegen action and turn privatization 6393 // off. 6394 PrePostActionTy NoPrivAction; 6395 6396 // Generate the code for the opening of the data environment. Capture all the 6397 // arguments of the runtime call by reference because they are used in the 6398 // closing of the region. 6399 auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF, 6400 PrePostActionTy &) { 6401 // Fill up the arrays with all the mapped variables. 6402 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6403 MappableExprsHandler::MapValuesArrayTy Pointers; 6404 MappableExprsHandler::MapValuesArrayTy Sizes; 6405 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6406 6407 // Get map clause information. 6408 MappableExprsHandler MCHandler(D, CGF); 6409 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6410 6411 // Fill up the arrays and create the arguments. 6412 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6413 6414 llvm::Value *BasePointersArrayArg = nullptr; 6415 llvm::Value *PointersArrayArg = nullptr; 6416 llvm::Value *SizesArrayArg = nullptr; 6417 llvm::Value *MapTypesArrayArg = nullptr; 6418 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 6419 SizesArrayArg, MapTypesArrayArg, Info); 6420 6421 // Emit device ID if any. 6422 llvm::Value *DeviceID = nullptr; 6423 if (Device) 6424 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6425 CGF.Int32Ty, /*isSigned=*/true); 6426 else 6427 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6428 6429 // Emit the number of elements in the offloading arrays. 6430 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 6431 6432 llvm::Value *OffloadingArgs[] = { 6433 DeviceID, PointerNum, BasePointersArrayArg, 6434 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6435 auto &RT = CGF.CGM.getOpenMPRuntime(); 6436 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), 6437 OffloadingArgs); 6438 6439 // If device pointer privatization is required, emit the body of the region 6440 // here. It will have to be duplicated: with and without privatization. 6441 if (!Info.CaptureDeviceAddrMap.empty()) 6442 CodeGen(CGF); 6443 }; 6444 6445 // Generate code for the closing of the data region. 6446 auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { 6447 assert(Info.isValid() && "Invalid data environment closing arguments."); 6448 6449 llvm::Value *BasePointersArrayArg = nullptr; 6450 llvm::Value *PointersArrayArg = nullptr; 6451 llvm::Value *SizesArrayArg = nullptr; 6452 llvm::Value *MapTypesArrayArg = nullptr; 6453 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 6454 SizesArrayArg, MapTypesArrayArg, Info); 6455 6456 // Emit device ID if any. 6457 llvm::Value *DeviceID = nullptr; 6458 if (Device) 6459 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6460 CGF.Int32Ty, /*isSigned=*/true); 6461 else 6462 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6463 6464 // Emit the number of elements in the offloading arrays. 6465 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 6466 6467 llvm::Value *OffloadingArgs[] = { 6468 DeviceID, PointerNum, BasePointersArrayArg, 6469 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 6470 auto &RT = CGF.CGM.getOpenMPRuntime(); 6471 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), 6472 OffloadingArgs); 6473 }; 6474 6475 // If we need device pointer privatization, we need to emit the body of the 6476 // region with no privatization in the 'else' branch of the conditional. 6477 // Otherwise, we don't have to do anything. 6478 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 6479 PrePostActionTy &) { 6480 if (!Info.CaptureDeviceAddrMap.empty()) { 6481 CodeGen.setAction(NoPrivAction); 6482 CodeGen(CGF); 6483 } 6484 }; 6485 6486 // We don't have to do anything to close the region if the if clause evaluates 6487 // to false. 6488 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6489 6490 if (IfCond) { 6491 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 6492 } else { 6493 RegionCodeGenTy RCG(BeginThenGen); 6494 RCG(CGF); 6495 } 6496 6497 // If we don't require privatization of device pointers, we emit the body in 6498 // between the runtime calls. This avoids duplicating the body code. 6499 if (Info.CaptureDeviceAddrMap.empty()) { 6500 CodeGen.setAction(NoPrivAction); 6501 CodeGen(CGF); 6502 } 6503 6504 if (IfCond) { 6505 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 6506 } else { 6507 RegionCodeGenTy RCG(EndThenGen); 6508 RCG(CGF); 6509 } 6510 } 6511 6512 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 6513 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 6514 const Expr *Device) { 6515 if (!CGF.HaveInsertPoint()) 6516 return; 6517 6518 assert((isa<OMPTargetEnterDataDirective>(D) || 6519 isa<OMPTargetExitDataDirective>(D) || 6520 isa<OMPTargetUpdateDirective>(D)) && 6521 "Expecting either target enter, exit data, or update directives."); 6522 6523 // Generate the code for the opening of the data environment. 6524 auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { 6525 // Fill up the arrays with all the mapped variables. 6526 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6527 MappableExprsHandler::MapValuesArrayTy Pointers; 6528 MappableExprsHandler::MapValuesArrayTy Sizes; 6529 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6530 6531 // Get map clause information. 6532 MappableExprsHandler MEHandler(D, CGF); 6533 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 6534 6535 // Fill up the arrays and create the arguments. 6536 TargetDataInfo Info; 6537 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6538 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 6539 Info.PointersArray, Info.SizesArray, 6540 Info.MapTypesArray, Info); 6541 6542 // Emit device ID if any. 6543 llvm::Value *DeviceID = nullptr; 6544 if (Device) 6545 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6546 CGF.Int32Ty, /*isSigned=*/true); 6547 else 6548 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6549 6550 // Emit the number of elements in the offloading arrays. 6551 auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6552 6553 llvm::Value *OffloadingArgs[] = { 6554 DeviceID, PointerNum, Info.BasePointersArray, 6555 Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; 6556 6557 auto &RT = CGF.CGM.getOpenMPRuntime(); 6558 // Select the right runtime function call for each expected standalone 6559 // directive. 6560 OpenMPRTLFunction RTLFn; 6561 switch (D.getDirectiveKind()) { 6562 default: 6563 llvm_unreachable("Unexpected standalone target data directive."); 6564 break; 6565 case OMPD_target_enter_data: 6566 RTLFn = OMPRTL__tgt_target_data_begin; 6567 break; 6568 case OMPD_target_exit_data: 6569 RTLFn = OMPRTL__tgt_target_data_end; 6570 break; 6571 case OMPD_target_update: 6572 RTLFn = OMPRTL__tgt_target_data_update; 6573 break; 6574 } 6575 CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); 6576 }; 6577 6578 // In the event we get an if clause, we don't have to take any action on the 6579 // else side. 6580 auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 6581 6582 if (IfCond) { 6583 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 6584 } else { 6585 RegionCodeGenTy ThenGenRCG(ThenGen); 6586 ThenGenRCG(CGF); 6587 } 6588 } 6589 6590 namespace { 6591 /// Kind of parameter in a function with 'declare simd' directive. 6592 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 6593 /// Attribute set of the parameter. 6594 struct ParamAttrTy { 6595 ParamKindTy Kind = Vector; 6596 llvm::APSInt StrideOrArg; 6597 llvm::APSInt Alignment; 6598 }; 6599 } // namespace 6600 6601 static unsigned evaluateCDTSize(const FunctionDecl *FD, 6602 ArrayRef<ParamAttrTy> ParamAttrs) { 6603 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 6604 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 6605 // of that clause. The VLEN value must be power of 2. 6606 // In other case the notion of the function`s "characteristic data type" (CDT) 6607 // is used to compute the vector length. 6608 // CDT is defined in the following order: 6609 // a) For non-void function, the CDT is the return type. 6610 // b) If the function has any non-uniform, non-linear parameters, then the 6611 // CDT is the type of the first such parameter. 6612 // c) If the CDT determined by a) or b) above is struct, union, or class 6613 // type which is pass-by-value (except for the type that maps to the 6614 // built-in complex data type), the characteristic data type is int. 6615 // d) If none of the above three cases is applicable, the CDT is int. 6616 // The VLEN is then determined based on the CDT and the size of vector 6617 // register of that ISA for which current vector version is generated. The 6618 // VLEN is computed using the formula below: 6619 // VLEN = sizeof(vector_register) / sizeof(CDT), 6620 // where vector register size specified in section 3.2.1 Registers and the 6621 // Stack Frame of original AMD64 ABI document. 6622 QualType RetType = FD->getReturnType(); 6623 if (RetType.isNull()) 6624 return 0; 6625 ASTContext &C = FD->getASTContext(); 6626 QualType CDT; 6627 if (!RetType.isNull() && !RetType->isVoidType()) 6628 CDT = RetType; 6629 else { 6630 unsigned Offset = 0; 6631 if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 6632 if (ParamAttrs[Offset].Kind == Vector) 6633 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 6634 ++Offset; 6635 } 6636 if (CDT.isNull()) { 6637 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 6638 if (ParamAttrs[I + Offset].Kind == Vector) { 6639 CDT = FD->getParamDecl(I)->getType(); 6640 break; 6641 } 6642 } 6643 } 6644 } 6645 if (CDT.isNull()) 6646 CDT = C.IntTy; 6647 CDT = CDT->getCanonicalTypeUnqualified(); 6648 if (CDT->isRecordType() || CDT->isUnionType()) 6649 CDT = C.IntTy; 6650 return C.getTypeSize(CDT); 6651 } 6652 6653 static void 6654 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 6655 const llvm::APSInt &VLENVal, 6656 ArrayRef<ParamAttrTy> ParamAttrs, 6657 OMPDeclareSimdDeclAttr::BranchStateTy State) { 6658 struct ISADataTy { 6659 char ISA; 6660 unsigned VecRegSize; 6661 }; 6662 ISADataTy ISAData[] = { 6663 { 6664 'b', 128 6665 }, // SSE 6666 { 6667 'c', 256 6668 }, // AVX 6669 { 6670 'd', 256 6671 }, // AVX2 6672 { 6673 'e', 512 6674 }, // AVX512 6675 }; 6676 llvm::SmallVector<char, 2> Masked; 6677 switch (State) { 6678 case OMPDeclareSimdDeclAttr::BS_Undefined: 6679 Masked.push_back('N'); 6680 Masked.push_back('M'); 6681 break; 6682 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 6683 Masked.push_back('N'); 6684 break; 6685 case OMPDeclareSimdDeclAttr::BS_Inbranch: 6686 Masked.push_back('M'); 6687 break; 6688 } 6689 for (auto Mask : Masked) { 6690 for (auto &Data : ISAData) { 6691 SmallString<256> Buffer; 6692 llvm::raw_svector_ostream Out(Buffer); 6693 Out << "_ZGV" << Data.ISA << Mask; 6694 if (!VLENVal) { 6695 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 6696 evaluateCDTSize(FD, ParamAttrs)); 6697 } else 6698 Out << VLENVal; 6699 for (auto &ParamAttr : ParamAttrs) { 6700 switch (ParamAttr.Kind){ 6701 case LinearWithVarStride: 6702 Out << 's' << ParamAttr.StrideOrArg; 6703 break; 6704 case Linear: 6705 Out << 'l'; 6706 if (!!ParamAttr.StrideOrArg) 6707 Out << ParamAttr.StrideOrArg; 6708 break; 6709 case Uniform: 6710 Out << 'u'; 6711 break; 6712 case Vector: 6713 Out << 'v'; 6714 break; 6715 } 6716 if (!!ParamAttr.Alignment) 6717 Out << 'a' << ParamAttr.Alignment; 6718 } 6719 Out << '_' << Fn->getName(); 6720 Fn->addFnAttr(Out.str()); 6721 } 6722 } 6723 } 6724 6725 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 6726 llvm::Function *Fn) { 6727 ASTContext &C = CGM.getContext(); 6728 FD = FD->getCanonicalDecl(); 6729 // Map params to their positions in function decl. 6730 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 6731 if (isa<CXXMethodDecl>(FD)) 6732 ParamPositions.insert({FD, 0}); 6733 unsigned ParamPos = ParamPositions.size(); 6734 for (auto *P : FD->parameters()) { 6735 ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); 6736 ++ParamPos; 6737 } 6738 for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 6739 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 6740 // Mark uniform parameters. 6741 for (auto *E : Attr->uniforms()) { 6742 E = E->IgnoreParenImpCasts(); 6743 unsigned Pos; 6744 if (isa<CXXThisExpr>(E)) 6745 Pos = ParamPositions[FD]; 6746 else { 6747 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6748 ->getCanonicalDecl(); 6749 Pos = ParamPositions[PVD]; 6750 } 6751 ParamAttrs[Pos].Kind = Uniform; 6752 } 6753 // Get alignment info. 6754 auto NI = Attr->alignments_begin(); 6755 for (auto *E : Attr->aligneds()) { 6756 E = E->IgnoreParenImpCasts(); 6757 unsigned Pos; 6758 QualType ParmTy; 6759 if (isa<CXXThisExpr>(E)) { 6760 Pos = ParamPositions[FD]; 6761 ParmTy = E->getType(); 6762 } else { 6763 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6764 ->getCanonicalDecl(); 6765 Pos = ParamPositions[PVD]; 6766 ParmTy = PVD->getType(); 6767 } 6768 ParamAttrs[Pos].Alignment = 6769 (*NI) ? (*NI)->EvaluateKnownConstInt(C) 6770 : llvm::APSInt::getUnsigned( 6771 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 6772 .getQuantity()); 6773 ++NI; 6774 } 6775 // Mark linear parameters. 6776 auto SI = Attr->steps_begin(); 6777 auto MI = Attr->modifiers_begin(); 6778 for (auto *E : Attr->linears()) { 6779 E = E->IgnoreParenImpCasts(); 6780 unsigned Pos; 6781 if (isa<CXXThisExpr>(E)) 6782 Pos = ParamPositions[FD]; 6783 else { 6784 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 6785 ->getCanonicalDecl(); 6786 Pos = ParamPositions[PVD]; 6787 } 6788 auto &ParamAttr = ParamAttrs[Pos]; 6789 ParamAttr.Kind = Linear; 6790 if (*SI) { 6791 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 6792 Expr::SE_AllowSideEffects)) { 6793 if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 6794 if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 6795 ParamAttr.Kind = LinearWithVarStride; 6796 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 6797 ParamPositions[StridePVD->getCanonicalDecl()]); 6798 } 6799 } 6800 } 6801 } 6802 ++SI; 6803 ++MI; 6804 } 6805 llvm::APSInt VLENVal; 6806 if (const Expr *VLEN = Attr->getSimdlen()) 6807 VLENVal = VLEN->EvaluateKnownConstInt(C); 6808 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 6809 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 6810 CGM.getTriple().getArch() == llvm::Triple::x86_64) 6811 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 6812 } 6813 } 6814 6815 namespace { 6816 /// Cleanup action for doacross support. 6817 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 6818 public: 6819 static const int DoacrossFinArgs = 2; 6820 6821 private: 6822 llvm::Value *RTLFn; 6823 llvm::Value *Args[DoacrossFinArgs]; 6824 6825 public: 6826 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 6827 : RTLFn(RTLFn) { 6828 assert(CallArgs.size() == DoacrossFinArgs); 6829 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 6830 } 6831 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 6832 if (!CGF.HaveInsertPoint()) 6833 return; 6834 CGF.EmitRuntimeCall(RTLFn, Args); 6835 } 6836 }; 6837 } // namespace 6838 6839 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 6840 const OMPLoopDirective &D) { 6841 if (!CGF.HaveInsertPoint()) 6842 return; 6843 6844 ASTContext &C = CGM.getContext(); 6845 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 6846 RecordDecl *RD; 6847 if (KmpDimTy.isNull()) { 6848 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 6849 // kmp_int64 lo; // lower 6850 // kmp_int64 up; // upper 6851 // kmp_int64 st; // stride 6852 // }; 6853 RD = C.buildImplicitRecord("kmp_dim"); 6854 RD->startDefinition(); 6855 addFieldToRecordDecl(C, RD, Int64Ty); 6856 addFieldToRecordDecl(C, RD, Int64Ty); 6857 addFieldToRecordDecl(C, RD, Int64Ty); 6858 RD->completeDefinition(); 6859 KmpDimTy = C.getRecordType(RD); 6860 } else 6861 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 6862 6863 Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); 6864 CGF.EmitNullInitialization(DimsAddr, KmpDimTy); 6865 enum { LowerFD = 0, UpperFD, StrideFD }; 6866 // Fill dims with data. 6867 LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); 6868 // dims.upper = num_iterations; 6869 LValue UpperLVal = 6870 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); 6871 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 6872 CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), 6873 Int64Ty, D.getNumIterations()->getExprLoc()); 6874 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 6875 // dims.stride = 1; 6876 LValue StrideLVal = 6877 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); 6878 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 6879 StrideLVal); 6880 6881 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 6882 // kmp_int32 num_dims, struct kmp_dim * dims); 6883 llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), 6884 getThreadID(CGF, D.getLocStart()), 6885 llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), 6886 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6887 DimsAddr.getPointer(), CGM.VoidPtrTy)}; 6888 6889 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 6890 CGF.EmitRuntimeCall(RTLFn, Args); 6891 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 6892 emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; 6893 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 6894 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 6895 llvm::makeArrayRef(FiniArgs)); 6896 } 6897 6898 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 6899 const OMPDependClause *C) { 6900 QualType Int64Ty = 6901 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 6902 const Expr *CounterVal = C->getCounterValue(); 6903 assert(CounterVal); 6904 llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), 6905 CounterVal->getType(), Int64Ty, 6906 CounterVal->getExprLoc()); 6907 Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); 6908 CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); 6909 llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), 6910 getThreadID(CGF, C->getLocStart()), 6911 CntAddr.getPointer()}; 6912 llvm::Value *RTLFn; 6913 if (C->getDependencyKind() == OMPC_DEPEND_source) 6914 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 6915 else { 6916 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 6917 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 6918 } 6919 CGF.EmitRuntimeCall(RTLFn, Args); 6920 } 6921 6922