1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/Bitcode/BitcodeReader.h" 23 #include "llvm/IR/CallSite.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// \brief Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// \brief Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// \brief Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// \brief Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// \brief Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// \brief Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// \brief Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// \brief Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// \brief Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// \brief API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// \brief Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// \brief Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// \brief A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// \brief API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); 150 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 151 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 152 CGF.EmitBlock(DoneBB); 153 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 154 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 155 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 156 CGF.Builder.GetInsertBlock()); 157 emitUntiedSwitch(CGF); 158 } 159 } 160 void emitUntiedSwitch(CodeGenFunction &CGF) const { 161 if (Untied) { 162 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 166 PartIdLVal); 167 UntiedCodeGen(CGF); 168 CodeGenFunction::JumpDest CurPoint = 169 CGF.getJumpDestInCurrentScope(".untied.next."); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 173 CGF.Builder.GetInsertBlock()); 174 CGF.EmitBranchThroughCleanup(CurPoint); 175 CGF.EmitBlock(CurPoint.getBlock()); 176 } 177 } 178 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 179 }; 180 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 181 const VarDecl *ThreadIDVar, 182 const RegionCodeGenTy &CodeGen, 183 OpenMPDirectiveKind Kind, bool HasCancel, 184 const UntiedTaskActionTy &Action) 185 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 186 ThreadIDVar(ThreadIDVar), Action(Action) { 187 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 188 } 189 190 /// \brief Get a variable or parameter for storing global thread id 191 /// inside OpenMP construct. 192 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 193 194 /// \brief Get an LValue for the current ThreadID variable. 195 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 196 197 /// \brief Get the name of the capture helper. 198 StringRef getHelperName() const override { return ".omp_outlined."; } 199 200 void emitUntiedSwitch(CodeGenFunction &CGF) override { 201 Action.emitUntiedSwitch(CGF); 202 } 203 204 static bool classof(const CGCapturedStmtInfo *Info) { 205 return CGOpenMPRegionInfo::classof(Info) && 206 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 207 TaskOutlinedRegion; 208 } 209 210 private: 211 /// \brief A variable or parameter storing global thread id for OpenMP 212 /// constructs. 213 const VarDecl *ThreadIDVar; 214 /// Action for emitting code for untied tasks. 215 const UntiedTaskActionTy &Action; 216 }; 217 218 /// \brief API for inlined captured statement code generation in OpenMP 219 /// constructs. 220 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 221 public: 222 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 223 const RegionCodeGenTy &CodeGen, 224 OpenMPDirectiveKind Kind, bool HasCancel) 225 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 226 OldCSI(OldCSI), 227 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 228 229 // \brief Retrieve the value of the context parameter. 230 llvm::Value *getContextValue() const override { 231 if (OuterRegionInfo) 232 return OuterRegionInfo->getContextValue(); 233 llvm_unreachable("No context value for inlined OpenMP region"); 234 } 235 236 void setContextValue(llvm::Value *V) override { 237 if (OuterRegionInfo) { 238 OuterRegionInfo->setContextValue(V); 239 return; 240 } 241 llvm_unreachable("No context value for inlined OpenMP region"); 242 } 243 244 /// \brief Lookup the captured field decl for a variable. 245 const FieldDecl *lookup(const VarDecl *VD) const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->lookup(VD); 248 // If there is no outer outlined region,no need to lookup in a list of 249 // captured variables, we can use the original one. 250 return nullptr; 251 } 252 253 FieldDecl *getThisFieldDecl() const override { 254 if (OuterRegionInfo) 255 return OuterRegionInfo->getThisFieldDecl(); 256 return nullptr; 257 } 258 259 /// \brief Get a variable or parameter for storing global thread id 260 /// inside OpenMP construct. 261 const VarDecl *getThreadIDVariable() const override { 262 if (OuterRegionInfo) 263 return OuterRegionInfo->getThreadIDVariable(); 264 return nullptr; 265 } 266 267 /// \brief Get the name of the capture helper. 268 StringRef getHelperName() const override { 269 if (auto *OuterRegionInfo = getOldCSI()) 270 return OuterRegionInfo->getHelperName(); 271 llvm_unreachable("No helper name for inlined OpenMP construct"); 272 } 273 274 void emitUntiedSwitch(CodeGenFunction &CGF) override { 275 if (OuterRegionInfo) 276 OuterRegionInfo->emitUntiedSwitch(CGF); 277 } 278 279 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 280 281 static bool classof(const CGCapturedStmtInfo *Info) { 282 return CGOpenMPRegionInfo::classof(Info) && 283 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 284 } 285 286 ~CGOpenMPInlinedRegionInfo() override = default; 287 288 private: 289 /// \brief CodeGen info about outer OpenMP region. 290 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 291 CGOpenMPRegionInfo *OuterRegionInfo; 292 }; 293 294 /// \brief API for captured statement code generation in OpenMP target 295 /// constructs. For this captures, implicit parameters are used instead of the 296 /// captured fields. The name of the target region has to be unique in a given 297 /// application so it is provided by the client, because only the client has 298 /// the information to generate that. 299 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 300 public: 301 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 302 const RegionCodeGenTy &CodeGen, StringRef HelperName) 303 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 304 /*HasCancel=*/false), 305 HelperName(HelperName) {} 306 307 /// \brief This is unused for target regions because each starts executing 308 /// with a single thread. 309 const VarDecl *getThreadIDVariable() const override { return nullptr; } 310 311 /// \brief Get the name of the capture helper. 312 StringRef getHelperName() const override { return HelperName; } 313 314 static bool classof(const CGCapturedStmtInfo *Info) { 315 return CGOpenMPRegionInfo::classof(Info) && 316 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 317 } 318 319 private: 320 StringRef HelperName; 321 }; 322 323 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 324 llvm_unreachable("No codegen for expressions"); 325 } 326 /// \brief API for generation of expressions captured in a innermost OpenMP 327 /// region. 328 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 329 public: 330 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 331 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 332 OMPD_unknown, 333 /*HasCancel=*/false), 334 PrivScope(CGF) { 335 // Make sure the globals captured in the provided statement are local by 336 // using the privatization logic. We assume the same variable is not 337 // captured more than once. 338 for (auto &C : CS.captures()) { 339 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 340 continue; 341 342 const VarDecl *VD = C.getCapturedVar(); 343 if (VD->isLocalVarDeclOrParm()) 344 continue; 345 346 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 347 /*RefersToEnclosingVariableOrCapture=*/false, 348 VD->getType().getNonReferenceType(), VK_LValue, 349 SourceLocation()); 350 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 351 return CGF.EmitLValue(&DRE).getAddress(); 352 }); 353 } 354 (void)PrivScope.Privatize(); 355 } 356 357 /// \brief Lookup the captured field decl for a variable. 358 const FieldDecl *lookup(const VarDecl *VD) const override { 359 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 360 return FD; 361 return nullptr; 362 } 363 364 /// \brief Emit the captured statement body. 365 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 366 llvm_unreachable("No body for expressions"); 367 } 368 369 /// \brief Get a variable or parameter for storing global thread id 370 /// inside OpenMP construct. 371 const VarDecl *getThreadIDVariable() const override { 372 llvm_unreachable("No thread id for expressions"); 373 } 374 375 /// \brief Get the name of the capture helper. 376 StringRef getHelperName() const override { 377 llvm_unreachable("No helper name for expressions"); 378 } 379 380 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 381 382 private: 383 /// Private scope to capture global variables. 384 CodeGenFunction::OMPPrivateScope PrivScope; 385 }; 386 387 /// \brief RAII for emitting code of OpenMP constructs. 388 class InlinedOpenMPRegionRAII { 389 CodeGenFunction &CGF; 390 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 391 FieldDecl *LambdaThisCaptureField = nullptr; 392 393 public: 394 /// \brief Constructs region for combined constructs. 395 /// \param CodeGen Code generation sequence for combined directives. Includes 396 /// a list of functions used for code generation of implicitly inlined 397 /// regions. 398 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 399 OpenMPDirectiveKind Kind, bool HasCancel) 400 : CGF(CGF) { 401 // Start emission for the construct. 402 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 403 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 404 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 405 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 406 CGF.LambdaThisCaptureField = nullptr; 407 } 408 409 ~InlinedOpenMPRegionRAII() { 410 // Restore original CapturedStmtInfo only if we're done with code emission. 411 auto *OldCSI = 412 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 413 delete CGF.CapturedStmtInfo; 414 CGF.CapturedStmtInfo = OldCSI; 415 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 416 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 417 } 418 }; 419 420 /// \brief Values for bit flags used in the ident_t to describe the fields. 421 /// All enumeric elements are named and described in accordance with the code 422 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 423 enum OpenMPLocationFlags { 424 /// \brief Use trampoline for internal microtask. 425 OMP_IDENT_IMD = 0x01, 426 /// \brief Use c-style ident structure. 427 OMP_IDENT_KMPC = 0x02, 428 /// \brief Atomic reduction option for kmpc_reduce. 429 OMP_ATOMIC_REDUCE = 0x10, 430 /// \brief Explicit 'barrier' directive. 431 OMP_IDENT_BARRIER_EXPL = 0x20, 432 /// \brief Implicit barrier in code. 433 OMP_IDENT_BARRIER_IMPL = 0x40, 434 /// \brief Implicit barrier in 'for' directive. 435 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 436 /// \brief Implicit barrier in 'sections' directive. 437 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 438 /// \brief Implicit barrier in 'single' directive. 439 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 440 }; 441 442 /// \brief Describes ident structure that describes a source location. 443 /// All descriptions are taken from 444 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 445 /// Original structure: 446 /// typedef struct ident { 447 /// kmp_int32 reserved_1; /**< might be used in Fortran; 448 /// see above */ 449 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 450 /// KMP_IDENT_KMPC identifies this union 451 /// member */ 452 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 453 /// see above */ 454 ///#if USE_ITT_BUILD 455 /// /* but currently used for storing 456 /// region-specific ITT */ 457 /// /* contextual information. */ 458 ///#endif /* USE_ITT_BUILD */ 459 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 460 /// C++ */ 461 /// char const *psource; /**< String describing the source location. 462 /// The string is composed of semi-colon separated 463 // fields which describe the source file, 464 /// the function and a pair of line numbers that 465 /// delimit the construct. 466 /// */ 467 /// } ident_t; 468 enum IdentFieldIndex { 469 /// \brief might be used in Fortran 470 IdentField_Reserved_1, 471 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 472 IdentField_Flags, 473 /// \brief Not really used in Fortran any more 474 IdentField_Reserved_2, 475 /// \brief Source[4] in Fortran, do not use for C++ 476 IdentField_Reserved_3, 477 /// \brief String describing the source location. The string is composed of 478 /// semi-colon separated fields which describe the source file, the function 479 /// and a pair of line numbers that delimit the construct. 480 IdentField_PSource 481 }; 482 483 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 484 /// the enum sched_type in kmp.h). 485 enum OpenMPSchedType { 486 /// \brief Lower bound for default (unordered) versions. 487 OMP_sch_lower = 32, 488 OMP_sch_static_chunked = 33, 489 OMP_sch_static = 34, 490 OMP_sch_dynamic_chunked = 35, 491 OMP_sch_guided_chunked = 36, 492 OMP_sch_runtime = 37, 493 OMP_sch_auto = 38, 494 /// static with chunk adjustment (e.g., simd) 495 OMP_sch_static_balanced_chunked = 45, 496 /// \brief Lower bound for 'ordered' versions. 497 OMP_ord_lower = 64, 498 OMP_ord_static_chunked = 65, 499 OMP_ord_static = 66, 500 OMP_ord_dynamic_chunked = 67, 501 OMP_ord_guided_chunked = 68, 502 OMP_ord_runtime = 69, 503 OMP_ord_auto = 70, 504 OMP_sch_default = OMP_sch_static, 505 /// \brief dist_schedule types 506 OMP_dist_sch_static_chunked = 91, 507 OMP_dist_sch_static = 92, 508 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 509 /// Set if the monotonic schedule modifier was present. 510 OMP_sch_modifier_monotonic = (1 << 29), 511 /// Set if the nonmonotonic schedule modifier was present. 512 OMP_sch_modifier_nonmonotonic = (1 << 30), 513 }; 514 515 enum OpenMPRTLFunction { 516 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 517 /// kmpc_micro microtask, ...); 518 OMPRTL__kmpc_fork_call, 519 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 520 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 521 OMPRTL__kmpc_threadprivate_cached, 522 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 523 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 524 OMPRTL__kmpc_threadprivate_register, 525 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 526 OMPRTL__kmpc_global_thread_num, 527 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 528 // kmp_critical_name *crit); 529 OMPRTL__kmpc_critical, 530 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 531 // global_tid, kmp_critical_name *crit, uintptr_t hint); 532 OMPRTL__kmpc_critical_with_hint, 533 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 534 // kmp_critical_name *crit); 535 OMPRTL__kmpc_end_critical, 536 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 537 // global_tid); 538 OMPRTL__kmpc_cancel_barrier, 539 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 540 OMPRTL__kmpc_barrier, 541 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 542 OMPRTL__kmpc_for_static_fini, 543 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 544 // global_tid); 545 OMPRTL__kmpc_serialized_parallel, 546 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 547 // global_tid); 548 OMPRTL__kmpc_end_serialized_parallel, 549 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 550 // kmp_int32 num_threads); 551 OMPRTL__kmpc_push_num_threads, 552 // Call to void __kmpc_flush(ident_t *loc); 553 OMPRTL__kmpc_flush, 554 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 555 OMPRTL__kmpc_master, 556 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 557 OMPRTL__kmpc_end_master, 558 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 559 // int end_part); 560 OMPRTL__kmpc_omp_taskyield, 561 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 562 OMPRTL__kmpc_single, 563 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 564 OMPRTL__kmpc_end_single, 565 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 566 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 567 // kmp_routine_entry_t *task_entry); 568 OMPRTL__kmpc_omp_task_alloc, 569 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 570 // new_task); 571 OMPRTL__kmpc_omp_task, 572 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 573 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 574 // kmp_int32 didit); 575 OMPRTL__kmpc_copyprivate, 576 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 577 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 578 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 579 OMPRTL__kmpc_reduce, 580 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 581 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 582 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 583 // *lck); 584 OMPRTL__kmpc_reduce_nowait, 585 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 586 // kmp_critical_name *lck); 587 OMPRTL__kmpc_end_reduce, 588 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 589 // kmp_critical_name *lck); 590 OMPRTL__kmpc_end_reduce_nowait, 591 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 592 // kmp_task_t * new_task); 593 OMPRTL__kmpc_omp_task_begin_if0, 594 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 595 // kmp_task_t * new_task); 596 OMPRTL__kmpc_omp_task_complete_if0, 597 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 598 OMPRTL__kmpc_ordered, 599 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 600 OMPRTL__kmpc_end_ordered, 601 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 602 // global_tid); 603 OMPRTL__kmpc_omp_taskwait, 604 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 605 OMPRTL__kmpc_taskgroup, 606 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 607 OMPRTL__kmpc_end_taskgroup, 608 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 609 // int proc_bind); 610 OMPRTL__kmpc_push_proc_bind, 611 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 612 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 613 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 614 OMPRTL__kmpc_omp_task_with_deps, 615 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 616 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 617 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 618 OMPRTL__kmpc_omp_wait_deps, 619 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 620 // global_tid, kmp_int32 cncl_kind); 621 OMPRTL__kmpc_cancellationpoint, 622 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 623 // kmp_int32 cncl_kind); 624 OMPRTL__kmpc_cancel, 625 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_teams, kmp_int32 thread_limit); 627 OMPRTL__kmpc_push_num_teams, 628 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 629 // microtask, ...); 630 OMPRTL__kmpc_fork_teams, 631 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 632 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 633 // sched, kmp_uint64 grainsize, void *task_dup); 634 OMPRTL__kmpc_taskloop, 635 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 636 // num_dims, struct kmp_dim *dims); 637 OMPRTL__kmpc_doacross_init, 638 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 639 OMPRTL__kmpc_doacross_fini, 640 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 641 // *vec); 642 OMPRTL__kmpc_doacross_post, 643 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 644 // *vec); 645 OMPRTL__kmpc_doacross_wait, 646 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 647 // *data); 648 OMPRTL__kmpc_task_reduction_init, 649 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 650 // *d); 651 OMPRTL__kmpc_task_reduction_get_th_data, 652 653 // 654 // Offloading related calls 655 // 656 // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 657 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 658 // *arg_types); 659 OMPRTL__tgt_target, 660 // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 661 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 662 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 663 OMPRTL__tgt_target_teams, 664 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 665 OMPRTL__tgt_register_lib, 666 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 667 OMPRTL__tgt_unregister_lib, 668 // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 669 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 670 OMPRTL__tgt_target_data_begin, 671 // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 672 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 673 OMPRTL__tgt_target_data_end, 674 // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 675 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 676 OMPRTL__tgt_target_data_update, 677 }; 678 679 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 680 /// region. 681 class CleanupTy final : public EHScopeStack::Cleanup { 682 PrePostActionTy *Action; 683 684 public: 685 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 686 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 687 if (!CGF.HaveInsertPoint()) 688 return; 689 Action->Exit(CGF); 690 } 691 }; 692 693 } // anonymous namespace 694 695 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 696 CodeGenFunction::RunCleanupsScope Scope(CGF); 697 if (PrePostAction) { 698 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 699 Callback(CodeGen, CGF, *PrePostAction); 700 } else { 701 PrePostActionTy Action; 702 Callback(CodeGen, CGF, Action); 703 } 704 } 705 706 /// Check if the combiner is a call to UDR combiner and if it is so return the 707 /// UDR decl used for reduction. 708 static const OMPDeclareReductionDecl * 709 getReductionInit(const Expr *ReductionOp) { 710 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 711 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 712 if (auto *DRE = 713 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 714 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 715 return DRD; 716 return nullptr; 717 } 718 719 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 720 const OMPDeclareReductionDecl *DRD, 721 const Expr *InitOp, 722 Address Private, Address Original, 723 QualType Ty) { 724 if (DRD->getInitializer()) { 725 std::pair<llvm::Function *, llvm::Function *> Reduction = 726 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 727 auto *CE = cast<CallExpr>(InitOp); 728 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 729 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 730 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 731 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 732 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 733 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 734 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 735 [=]() -> Address { return Private; }); 736 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 737 [=]() -> Address { return Original; }); 738 (void)PrivateScope.Privatize(); 739 RValue Func = RValue::get(Reduction.second); 740 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 741 CGF.EmitIgnoredExpr(InitOp); 742 } else { 743 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 744 auto *GV = new llvm::GlobalVariable( 745 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 746 llvm::GlobalValue::PrivateLinkage, Init, ".init"); 747 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 748 RValue InitRVal; 749 switch (CGF.getEvaluationKind(Ty)) { 750 case TEK_Scalar: 751 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); 752 break; 753 case TEK_Complex: 754 InitRVal = 755 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); 756 break; 757 case TEK_Aggregate: 758 InitRVal = RValue::getAggregate(LV.getAddress()); 759 break; 760 } 761 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); 762 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 763 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 764 /*IsInitializer=*/false); 765 } 766 } 767 768 /// \brief Emit initialization of arrays of complex types. 769 /// \param DestAddr Address of the array. 770 /// \param Type Type of array. 771 /// \param Init Initial expression of array. 772 /// \param SrcAddr Address of the original array. 773 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 774 QualType Type, const Expr *Init, 775 const OMPDeclareReductionDecl *DRD, 776 Address SrcAddr = Address::invalid()) { 777 // Perform element-by-element initialization. 778 QualType ElementTy; 779 780 // Drill down to the base element type on both arrays. 781 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 782 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 783 DestAddr = 784 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 785 if (DRD) 786 SrcAddr = 787 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 788 789 llvm::Value *SrcBegin = nullptr; 790 if (DRD) 791 SrcBegin = SrcAddr.getPointer(); 792 auto DestBegin = DestAddr.getPointer(); 793 // Cast from pointer to array type to pointer to single element. 794 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 795 // The basic structure here is a while-do loop. 796 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 797 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 798 auto IsEmpty = 799 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 800 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 801 802 // Enter the loop body, making that address the current address. 803 auto EntryBB = CGF.Builder.GetInsertBlock(); 804 CGF.EmitBlock(BodyBB); 805 806 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 807 808 llvm::PHINode *SrcElementPHI = nullptr; 809 Address SrcElementCurrent = Address::invalid(); 810 if (DRD) { 811 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 812 "omp.arraycpy.srcElementPast"); 813 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 814 SrcElementCurrent = 815 Address(SrcElementPHI, 816 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 817 } 818 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 819 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 820 DestElementPHI->addIncoming(DestBegin, EntryBB); 821 Address DestElementCurrent = 822 Address(DestElementPHI, 823 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 824 825 // Emit copy. 826 { 827 CodeGenFunction::RunCleanupsScope InitScope(CGF); 828 if (DRD && (DRD->getInitializer() || !Init)) { 829 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 830 SrcElementCurrent, ElementTy); 831 } else 832 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 833 /*IsInitializer=*/false); 834 } 835 836 if (DRD) { 837 // Shift the address forward by one element. 838 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( 839 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 840 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 841 } 842 843 // Shift the address forward by one element. 844 auto DestElementNext = CGF.Builder.CreateConstGEP1_32( 845 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 846 // Check whether we've reached the end. 847 auto Done = 848 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 849 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 850 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 851 852 // Done. 853 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 854 } 855 856 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 857 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 858 return CGF.EmitOMPArraySectionExpr(OASE); 859 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) 860 return CGF.EmitLValue(ASE); 861 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 862 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 863 CGF.CapturedStmtInfo && 864 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 865 E->getType(), VK_LValue, E->getExprLoc()); 866 // Store the address of the original variable associated with the LHS 867 // implicit variable. 868 return CGF.EmitLValue(&DRE); 869 } 870 871 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 872 const Expr *E) { 873 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 874 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 875 return LValue(); 876 } 877 878 void ReductionCodeGen::emitAggregateInitialization( 879 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 880 const OMPDeclareReductionDecl *DRD) { 881 // Emit VarDecl with copy init for arrays. 882 // Get the address of the original variable captured in current 883 // captured region. 884 auto *PrivateVD = 885 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 886 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 887 DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(), 888 DRD, SharedLVal.getAddress()); 889 } 890 891 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 892 ArrayRef<const Expr *> Privates, 893 ArrayRef<const Expr *> ReductionOps) { 894 ClausesData.reserve(Shareds.size()); 895 SharedAddresses.reserve(Shareds.size()); 896 Sizes.reserve(Shareds.size()); 897 BaseDecls.reserve(Shareds.size()); 898 auto IPriv = Privates.begin(); 899 auto IRed = ReductionOps.begin(); 900 for (const auto *Ref : Shareds) { 901 ClausesData.emplace_back(Ref, *IPriv, *IRed); 902 std::advance(IPriv, 1); 903 std::advance(IRed, 1); 904 } 905 } 906 907 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 908 assert(SharedAddresses.size() == N && 909 "Number of generated lvalues must be exactly N."); 910 SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref), 911 emitSharedLValueUB(CGF, ClausesData[N].Ref)); 912 } 913 914 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 915 auto *PrivateVD = 916 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 917 QualType PrivateType = PrivateVD->getType(); 918 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 919 if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { 920 Sizes.emplace_back( 921 CGF.getTypeSize( 922 SharedAddresses[N].first.getType().getNonReferenceType()), 923 nullptr); 924 return; 925 } 926 llvm::Value *Size; 927 llvm::Value *SizeInChars; 928 llvm::Type *ElemType = 929 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 930 ->getElementType(); 931 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 932 if (AsArraySection) { 933 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 934 SharedAddresses[N].first.getPointer()); 935 Size = CGF.Builder.CreateNUWAdd( 936 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 937 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 938 } else { 939 SizeInChars = CGF.getTypeSize( 940 SharedAddresses[N].first.getType().getNonReferenceType()); 941 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 942 } 943 Sizes.emplace_back(SizeInChars, Size); 944 CodeGenFunction::OpaqueValueMapping OpaqueMap( 945 CGF, 946 cast<OpaqueValueExpr>( 947 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 948 RValue::get(Size)); 949 CGF.EmitVariablyModifiedType(PrivateType); 950 } 951 952 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 953 llvm::Value *Size) { 954 auto *PrivateVD = 955 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 956 QualType PrivateType = PrivateVD->getType(); 957 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 958 if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { 959 assert(!Size && !Sizes[N].second && 960 "Size should be nullptr for non-variably modified redution " 961 "items."); 962 return; 963 } 964 CodeGenFunction::OpaqueValueMapping OpaqueMap( 965 CGF, 966 cast<OpaqueValueExpr>( 967 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 968 RValue::get(Size)); 969 CGF.EmitVariablyModifiedType(PrivateType); 970 } 971 972 void ReductionCodeGen::emitInitialization( 973 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 974 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 975 assert(SharedAddresses.size() > N && "No variable was generated"); 976 auto *PrivateVD = 977 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 978 auto *DRD = getReductionInit(ClausesData[N].ReductionOp); 979 QualType PrivateType = PrivateVD->getType(); 980 PrivateAddr = CGF.Builder.CreateElementBitCast( 981 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 982 QualType SharedType = SharedAddresses[N].first.getType(); 983 SharedLVal = CGF.MakeAddrLValue( 984 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 985 CGF.ConvertTypeForMem(SharedType)), 986 SharedType, SharedAddresses[N].first.getBaseInfo()); 987 if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) || 988 CGF.getContext().getAsArrayType(PrivateVD->getType())) { 989 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 990 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 991 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 992 PrivateAddr, SharedLVal.getAddress(), 993 SharedLVal.getType()); 994 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 995 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 996 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 997 PrivateVD->getType().getQualifiers(), 998 /*IsInitializer=*/false); 999 } 1000 } 1001 1002 bool ReductionCodeGen::needCleanups(unsigned N) { 1003 auto *PrivateVD = 1004 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1005 QualType PrivateType = PrivateVD->getType(); 1006 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1007 return DTorKind != QualType::DK_none; 1008 } 1009 1010 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1011 Address PrivateAddr) { 1012 auto *PrivateVD = 1013 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1014 QualType PrivateType = PrivateVD->getType(); 1015 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1016 if (needCleanups(N)) { 1017 PrivateAddr = CGF.Builder.CreateElementBitCast( 1018 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1019 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1020 } 1021 } 1022 1023 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1024 LValue BaseLV) { 1025 BaseTy = BaseTy.getNonReferenceType(); 1026 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1027 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1028 if (auto *PtrTy = BaseTy->getAs<PointerType>()) 1029 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1030 else { 1031 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), 1032 BaseTy->castAs<ReferenceType>()); 1033 } 1034 BaseTy = BaseTy->getPointeeType(); 1035 } 1036 return CGF.MakeAddrLValue( 1037 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1038 CGF.ConvertTypeForMem(ElTy)), 1039 BaseLV.getType(), BaseLV.getBaseInfo()); 1040 } 1041 1042 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1043 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1044 llvm::Value *Addr) { 1045 Address Tmp = Address::invalid(); 1046 Address TopTmp = Address::invalid(); 1047 Address MostTopTmp = Address::invalid(); 1048 BaseTy = BaseTy.getNonReferenceType(); 1049 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1050 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1051 Tmp = CGF.CreateMemTemp(BaseTy); 1052 if (TopTmp.isValid()) 1053 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1054 else 1055 MostTopTmp = Tmp; 1056 TopTmp = Tmp; 1057 BaseTy = BaseTy->getPointeeType(); 1058 } 1059 llvm::Type *Ty = BaseLVType; 1060 if (Tmp.isValid()) 1061 Ty = Tmp.getElementType(); 1062 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1063 if (Tmp.isValid()) { 1064 CGF.Builder.CreateStore(Addr, Tmp); 1065 return MostTopTmp; 1066 } 1067 return Address(Addr, BaseLVAlignment); 1068 } 1069 1070 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1071 Address PrivateAddr) { 1072 const DeclRefExpr *DE; 1073 const VarDecl *OrigVD = nullptr; 1074 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) { 1075 auto *Base = OASE->getBase()->IgnoreParenImpCasts(); 1076 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1077 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1078 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1079 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1080 DE = cast<DeclRefExpr>(Base); 1081 OrigVD = cast<VarDecl>(DE->getDecl()); 1082 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) { 1083 auto *Base = ASE->getBase()->IgnoreParenImpCasts(); 1084 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1085 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1086 DE = cast<DeclRefExpr>(Base); 1087 OrigVD = cast<VarDecl>(DE->getDecl()); 1088 } 1089 if (OrigVD) { 1090 BaseDecls.emplace_back(OrigVD); 1091 auto OriginalBaseLValue = CGF.EmitLValue(DE); 1092 LValue BaseLValue = 1093 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1094 OriginalBaseLValue); 1095 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1096 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1097 llvm::Value *Ptr = 1098 CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment); 1099 return castToBase(CGF, OrigVD->getType(), 1100 SharedAddresses[N].first.getType(), 1101 OriginalBaseLValue.getPointer()->getType(), 1102 OriginalBaseLValue.getAlignment(), Ptr); 1103 } 1104 BaseDecls.emplace_back( 1105 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1106 return PrivateAddr; 1107 } 1108 1109 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1110 auto *DRD = getReductionInit(ClausesData[N].ReductionOp); 1111 return DRD && DRD->getInitializer(); 1112 } 1113 1114 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1115 return CGF.EmitLoadOfPointerLValue( 1116 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1117 getThreadIDVariable()->getType()->castAs<PointerType>()); 1118 } 1119 1120 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1121 if (!CGF.HaveInsertPoint()) 1122 return; 1123 // 1.2.2 OpenMP Language Terminology 1124 // Structured block - An executable statement with a single entry at the 1125 // top and a single exit at the bottom. 1126 // The point of exit cannot be a branch out of the structured block. 1127 // longjmp() and throw() must not violate the entry/exit criteria. 1128 CGF.EHStack.pushTerminate(); 1129 CodeGen(CGF); 1130 CGF.EHStack.popTerminate(); 1131 } 1132 1133 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1134 CodeGenFunction &CGF) { 1135 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1136 getThreadIDVariable()->getType(), 1137 LValueBaseInfo(AlignmentSource::Decl, false)); 1138 } 1139 1140 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1141 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 1142 IdentTy = llvm::StructType::create( 1143 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 1144 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 1145 CGM.Int8PtrTy /* psource */); 1146 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1147 1148 loadOffloadInfoMetadata(); 1149 } 1150 1151 void CGOpenMPRuntime::clear() { 1152 InternalVars.clear(); 1153 } 1154 1155 static llvm::Function * 1156 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1157 const Expr *CombinerInitializer, const VarDecl *In, 1158 const VarDecl *Out, bool IsCombiner) { 1159 // void .omp_combiner.(Ty *in, Ty *out); 1160 auto &C = CGM.getContext(); 1161 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1162 FunctionArgList Args; 1163 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1164 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1165 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1166 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1167 Args.push_back(&OmpOutParm); 1168 Args.push_back(&OmpInParm); 1169 auto &FnInfo = 1170 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1171 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1172 auto *Fn = llvm::Function::Create( 1173 FnTy, llvm::GlobalValue::InternalLinkage, 1174 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 1175 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 1176 Fn->removeFnAttr(llvm::Attribute::NoInline); 1177 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1178 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1179 CodeGenFunction CGF(CGM); 1180 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1181 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1182 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 1183 CodeGenFunction::OMPPrivateScope Scope(CGF); 1184 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1185 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 1186 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1187 .getAddress(); 1188 }); 1189 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1190 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 1191 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1192 .getAddress(); 1193 }); 1194 (void)Scope.Privatize(); 1195 CGF.EmitIgnoredExpr(CombinerInitializer); 1196 Scope.ForceCleanup(); 1197 CGF.FinishFunction(); 1198 return Fn; 1199 } 1200 1201 void CGOpenMPRuntime::emitUserDefinedReduction( 1202 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1203 if (UDRMap.count(D) > 0) 1204 return; 1205 auto &C = CGM.getContext(); 1206 if (!In || !Out) { 1207 In = &C.Idents.get("omp_in"); 1208 Out = &C.Idents.get("omp_out"); 1209 } 1210 llvm::Function *Combiner = emitCombinerOrInitializer( 1211 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 1212 cast<VarDecl>(D->lookup(Out).front()), 1213 /*IsCombiner=*/true); 1214 llvm::Function *Initializer = nullptr; 1215 if (auto *Init = D->getInitializer()) { 1216 if (!Priv || !Orig) { 1217 Priv = &C.Idents.get("omp_priv"); 1218 Orig = &C.Idents.get("omp_orig"); 1219 } 1220 Initializer = emitCombinerOrInitializer( 1221 CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), 1222 cast<VarDecl>(D->lookup(Priv).front()), 1223 /*IsCombiner=*/false); 1224 } 1225 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 1226 if (CGF) { 1227 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1228 Decls.second.push_back(D); 1229 } 1230 } 1231 1232 std::pair<llvm::Function *, llvm::Function *> 1233 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1234 auto I = UDRMap.find(D); 1235 if (I != UDRMap.end()) 1236 return I->second; 1237 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1238 return UDRMap.lookup(D); 1239 } 1240 1241 // Layout information for ident_t. 1242 static CharUnits getIdentAlign(CodeGenModule &CGM) { 1243 return CGM.getPointerAlign(); 1244 } 1245 static CharUnits getIdentSize(CodeGenModule &CGM) { 1246 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 1247 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 1248 } 1249 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 1250 // All the fields except the last are i32, so this works beautifully. 1251 return unsigned(Field) * CharUnits::fromQuantity(4); 1252 } 1253 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 1254 IdentFieldIndex Field, 1255 const llvm::Twine &Name = "") { 1256 auto Offset = getOffsetOfIdentField(Field); 1257 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 1258 } 1259 1260 static llvm::Value *emitParallelOrTeamsOutlinedFunction( 1261 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1262 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1263 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1264 assert(ThreadIDVar->getType()->isPointerType() && 1265 "thread id variable must be of type kmp_int32 *"); 1266 CodeGenFunction CGF(CGM, true); 1267 bool HasCancel = false; 1268 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1269 HasCancel = OPD->hasCancel(); 1270 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1271 HasCancel = OPSD->hasCancel(); 1272 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1273 HasCancel = OPFD->hasCancel(); 1274 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1275 HasCancel, OutlinedHelperName); 1276 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1277 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1278 } 1279 1280 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 1281 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1282 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1283 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1284 return emitParallelOrTeamsOutlinedFunction( 1285 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1286 } 1287 1288 llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1289 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1290 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1291 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1292 return emitParallelOrTeamsOutlinedFunction( 1293 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1294 } 1295 1296 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 1297 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1298 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1299 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1300 bool Tied, unsigned &NumberOfParts) { 1301 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1302 PrePostActionTy &) { 1303 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 1304 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 1305 llvm::Value *TaskArgs[] = { 1306 UpLoc, ThreadID, 1307 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1308 TaskTVar->getType()->castAs<PointerType>()) 1309 .getPointer()}; 1310 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1311 }; 1312 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1313 UntiedCodeGen); 1314 CodeGen.setAction(Action); 1315 assert(!ThreadIDVar->getType()->isPointerType() && 1316 "thread id variable must be of type kmp_int32 for tasks"); 1317 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 1318 auto *TD = dyn_cast<OMPTaskDirective>(&D); 1319 CodeGenFunction CGF(CGM, true); 1320 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1321 InnermostKind, 1322 TD ? TD->hasCancel() : false, Action); 1323 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1324 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 1325 if (!Tied) 1326 NumberOfParts = Action.getNumberOfParts(); 1327 return Res; 1328 } 1329 1330 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1331 CharUnits Align = getIdentAlign(CGM); 1332 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 1333 if (!Entry) { 1334 if (!DefaultOpenMPPSource) { 1335 // Initialize default location for psource field of ident_t structure of 1336 // all ident_t objects. Format is ";file;function;line;column;;". 1337 // Taken from 1338 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 1339 DefaultOpenMPPSource = 1340 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1341 DefaultOpenMPPSource = 1342 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1343 } 1344 1345 ConstantInitBuilder builder(CGM); 1346 auto fields = builder.beginStruct(IdentTy); 1347 fields.addInt(CGM.Int32Ty, 0); 1348 fields.addInt(CGM.Int32Ty, Flags); 1349 fields.addInt(CGM.Int32Ty, 0); 1350 fields.addInt(CGM.Int32Ty, 0); 1351 fields.add(DefaultOpenMPPSource); 1352 auto DefaultOpenMPLocation = 1353 fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, 1354 llvm::GlobalValue::PrivateLinkage); 1355 DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 1356 1357 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 1358 } 1359 return Address(Entry, Align); 1360 } 1361 1362 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1363 SourceLocation Loc, 1364 unsigned Flags) { 1365 Flags |= OMP_IDENT_KMPC; 1366 // If no debug info is generated - return global default location. 1367 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1368 Loc.isInvalid()) 1369 return getOrCreateDefaultLocation(Flags).getPointer(); 1370 1371 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1372 1373 Address LocValue = Address::invalid(); 1374 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1375 if (I != OpenMPLocThreadIDMap.end()) 1376 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 1377 1378 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1379 // GetOpenMPThreadID was called before this routine. 1380 if (!LocValue.isValid()) { 1381 // Generate "ident_t .kmpc_loc.addr;" 1382 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 1383 ".kmpc_loc.addr"); 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 Elem.second.DebugLoc = AI.getPointer(); 1386 LocValue = AI; 1387 1388 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1389 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1390 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1391 CGM.getSize(getIdentSize(CGF.CGM))); 1392 } 1393 1394 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1395 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 1396 1397 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1398 if (OMPDebugLoc == nullptr) { 1399 SmallString<128> Buffer2; 1400 llvm::raw_svector_ostream OS2(Buffer2); 1401 // Build debug location 1402 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1403 OS2 << ";" << PLoc.getFilename() << ";"; 1404 if (const FunctionDecl *FD = 1405 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 1406 OS2 << FD->getQualifiedNameAsString(); 1407 } 1408 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1409 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1410 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1411 } 1412 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1413 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 1414 1415 // Our callers always pass this to a runtime function, so for 1416 // convenience, go ahead and return a naked pointer. 1417 return LocValue.getPointer(); 1418 } 1419 1420 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1421 SourceLocation Loc) { 1422 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1423 1424 llvm::Value *ThreadID = nullptr; 1425 // Check whether we've already cached a load of the thread id in this 1426 // function. 1427 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1428 if (I != OpenMPLocThreadIDMap.end()) { 1429 ThreadID = I->second.ThreadID; 1430 if (ThreadID != nullptr) 1431 return ThreadID; 1432 } 1433 if (auto *OMPRegionInfo = 1434 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1435 if (OMPRegionInfo->getThreadIDVariable()) { 1436 // Check if this an outlined function with thread id passed as argument. 1437 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1438 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 1439 // If value loaded in entry block, cache it and use it everywhere in 1440 // function. 1441 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1442 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1443 Elem.second.ThreadID = ThreadID; 1444 } 1445 return ThreadID; 1446 } 1447 } 1448 1449 // This is not an outlined function region - need to call __kmpc_int32 1450 // kmpc_global_thread_num(ident_t *loc). 1451 // Generate thread id value and cache this value for use across the 1452 // function. 1453 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1454 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 1455 ThreadID = 1456 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1457 emitUpdateLocation(CGF, Loc)); 1458 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1459 Elem.second.ThreadID = ThreadID; 1460 return ThreadID; 1461 } 1462 1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1464 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1466 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1467 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1468 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1469 UDRMap.erase(D); 1470 } 1471 FunctionUDRMap.erase(CGF.CurFn); 1472 } 1473 } 1474 1475 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1476 if (!IdentTy) { 1477 } 1478 return llvm::PointerType::getUnqual(IdentTy); 1479 } 1480 1481 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1482 if (!Kmpc_MicroTy) { 1483 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1484 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1485 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1486 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1487 } 1488 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1489 } 1490 1491 llvm::Constant * 1492 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1493 llvm::Constant *RTLFn = nullptr; 1494 switch (static_cast<OpenMPRTLFunction>(Function)) { 1495 case OMPRTL__kmpc_fork_call: { 1496 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1497 // microtask, ...); 1498 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1499 getKmpc_MicroPointerTy()}; 1500 llvm::FunctionType *FnTy = 1501 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1502 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1503 break; 1504 } 1505 case OMPRTL__kmpc_global_thread_num: { 1506 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1507 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1508 llvm::FunctionType *FnTy = 1509 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1510 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1511 break; 1512 } 1513 case OMPRTL__kmpc_threadprivate_cached: { 1514 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1515 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1516 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1517 CGM.VoidPtrTy, CGM.SizeTy, 1518 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1519 llvm::FunctionType *FnTy = 1520 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1521 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1522 break; 1523 } 1524 case OMPRTL__kmpc_critical: { 1525 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1526 // kmp_critical_name *crit); 1527 llvm::Type *TypeParams[] = { 1528 getIdentTyPointerTy(), CGM.Int32Ty, 1529 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1530 llvm::FunctionType *FnTy = 1531 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1532 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1533 break; 1534 } 1535 case OMPRTL__kmpc_critical_with_hint: { 1536 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1537 // kmp_critical_name *crit, uintptr_t hint); 1538 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1539 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1540 CGM.IntPtrTy}; 1541 llvm::FunctionType *FnTy = 1542 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1543 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1544 break; 1545 } 1546 case OMPRTL__kmpc_threadprivate_register: { 1547 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1548 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1549 // typedef void *(*kmpc_ctor)(void *); 1550 auto KmpcCtorTy = 1551 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1552 /*isVarArg*/ false)->getPointerTo(); 1553 // typedef void *(*kmpc_cctor)(void *, void *); 1554 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1555 auto KmpcCopyCtorTy = 1556 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1557 /*isVarArg*/ false)->getPointerTo(); 1558 // typedef void (*kmpc_dtor)(void *); 1559 auto KmpcDtorTy = 1560 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1561 ->getPointerTo(); 1562 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1563 KmpcCopyCtorTy, KmpcDtorTy}; 1564 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1565 /*isVarArg*/ false); 1566 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1567 break; 1568 } 1569 case OMPRTL__kmpc_end_critical: { 1570 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1571 // kmp_critical_name *crit); 1572 llvm::Type *TypeParams[] = { 1573 getIdentTyPointerTy(), CGM.Int32Ty, 1574 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1575 llvm::FunctionType *FnTy = 1576 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1577 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1578 break; 1579 } 1580 case OMPRTL__kmpc_cancel_barrier: { 1581 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1582 // global_tid); 1583 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1584 llvm::FunctionType *FnTy = 1585 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1586 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1587 break; 1588 } 1589 case OMPRTL__kmpc_barrier: { 1590 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1591 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1592 llvm::FunctionType *FnTy = 1593 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1594 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1595 break; 1596 } 1597 case OMPRTL__kmpc_for_static_fini: { 1598 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1599 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1600 llvm::FunctionType *FnTy = 1601 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1602 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1603 break; 1604 } 1605 case OMPRTL__kmpc_push_num_threads: { 1606 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1607 // kmp_int32 num_threads) 1608 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1609 CGM.Int32Ty}; 1610 llvm::FunctionType *FnTy = 1611 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1612 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1613 break; 1614 } 1615 case OMPRTL__kmpc_serialized_parallel: { 1616 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1617 // global_tid); 1618 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1619 llvm::FunctionType *FnTy = 1620 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1621 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1622 break; 1623 } 1624 case OMPRTL__kmpc_end_serialized_parallel: { 1625 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1626 // global_tid); 1627 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1628 llvm::FunctionType *FnTy = 1629 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1630 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1631 break; 1632 } 1633 case OMPRTL__kmpc_flush: { 1634 // Build void __kmpc_flush(ident_t *loc); 1635 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1636 llvm::FunctionType *FnTy = 1637 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1638 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1639 break; 1640 } 1641 case OMPRTL__kmpc_master: { 1642 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1643 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1644 llvm::FunctionType *FnTy = 1645 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1646 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1647 break; 1648 } 1649 case OMPRTL__kmpc_end_master: { 1650 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1651 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1652 llvm::FunctionType *FnTy = 1653 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1654 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1655 break; 1656 } 1657 case OMPRTL__kmpc_omp_taskyield: { 1658 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1659 // int end_part); 1660 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1661 llvm::FunctionType *FnTy = 1662 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1663 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1664 break; 1665 } 1666 case OMPRTL__kmpc_single: { 1667 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1668 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1669 llvm::FunctionType *FnTy = 1670 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1671 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1672 break; 1673 } 1674 case OMPRTL__kmpc_end_single: { 1675 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1676 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1677 llvm::FunctionType *FnTy = 1678 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1679 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1680 break; 1681 } 1682 case OMPRTL__kmpc_omp_task_alloc: { 1683 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1684 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1685 // kmp_routine_entry_t *task_entry); 1686 assert(KmpRoutineEntryPtrTy != nullptr && 1687 "Type kmp_routine_entry_t must be created."); 1688 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1689 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1690 // Return void * and then cast to particular kmp_task_t type. 1691 llvm::FunctionType *FnTy = 1692 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1693 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1694 break; 1695 } 1696 case OMPRTL__kmpc_omp_task: { 1697 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1698 // *new_task); 1699 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1700 CGM.VoidPtrTy}; 1701 llvm::FunctionType *FnTy = 1702 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1703 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1704 break; 1705 } 1706 case OMPRTL__kmpc_copyprivate: { 1707 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1708 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1709 // kmp_int32 didit); 1710 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1711 auto *CpyFnTy = 1712 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1713 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1714 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1715 CGM.Int32Ty}; 1716 llvm::FunctionType *FnTy = 1717 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1718 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1719 break; 1720 } 1721 case OMPRTL__kmpc_reduce: { 1722 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1723 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1724 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1725 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1726 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1727 /*isVarArg=*/false); 1728 llvm::Type *TypeParams[] = { 1729 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1730 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1731 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1732 llvm::FunctionType *FnTy = 1733 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1734 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1735 break; 1736 } 1737 case OMPRTL__kmpc_reduce_nowait: { 1738 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1739 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1740 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1741 // *lck); 1742 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1743 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1744 /*isVarArg=*/false); 1745 llvm::Type *TypeParams[] = { 1746 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1747 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1748 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1749 llvm::FunctionType *FnTy = 1750 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1751 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1752 break; 1753 } 1754 case OMPRTL__kmpc_end_reduce: { 1755 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1756 // kmp_critical_name *lck); 1757 llvm::Type *TypeParams[] = { 1758 getIdentTyPointerTy(), CGM.Int32Ty, 1759 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1760 llvm::FunctionType *FnTy = 1761 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1762 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1763 break; 1764 } 1765 case OMPRTL__kmpc_end_reduce_nowait: { 1766 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1767 // kmp_critical_name *lck); 1768 llvm::Type *TypeParams[] = { 1769 getIdentTyPointerTy(), CGM.Int32Ty, 1770 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1771 llvm::FunctionType *FnTy = 1772 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1773 RTLFn = 1774 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1775 break; 1776 } 1777 case OMPRTL__kmpc_omp_task_begin_if0: { 1778 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1779 // *new_task); 1780 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1781 CGM.VoidPtrTy}; 1782 llvm::FunctionType *FnTy = 1783 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1784 RTLFn = 1785 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1786 break; 1787 } 1788 case OMPRTL__kmpc_omp_task_complete_if0: { 1789 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1790 // *new_task); 1791 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1792 CGM.VoidPtrTy}; 1793 llvm::FunctionType *FnTy = 1794 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1795 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1796 /*Name=*/"__kmpc_omp_task_complete_if0"); 1797 break; 1798 } 1799 case OMPRTL__kmpc_ordered: { 1800 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1801 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1802 llvm::FunctionType *FnTy = 1803 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1804 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1805 break; 1806 } 1807 case OMPRTL__kmpc_end_ordered: { 1808 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1809 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1810 llvm::FunctionType *FnTy = 1811 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1812 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1813 break; 1814 } 1815 case OMPRTL__kmpc_omp_taskwait: { 1816 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1817 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1818 llvm::FunctionType *FnTy = 1819 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1820 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1821 break; 1822 } 1823 case OMPRTL__kmpc_taskgroup: { 1824 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1825 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1826 llvm::FunctionType *FnTy = 1827 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1828 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1829 break; 1830 } 1831 case OMPRTL__kmpc_end_taskgroup: { 1832 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1833 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1834 llvm::FunctionType *FnTy = 1835 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1836 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1837 break; 1838 } 1839 case OMPRTL__kmpc_push_proc_bind: { 1840 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1841 // int proc_bind) 1842 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1843 llvm::FunctionType *FnTy = 1844 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1845 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1846 break; 1847 } 1848 case OMPRTL__kmpc_omp_task_with_deps: { 1849 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1850 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1851 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1852 llvm::Type *TypeParams[] = { 1853 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1854 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1855 llvm::FunctionType *FnTy = 1856 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1857 RTLFn = 1858 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1859 break; 1860 } 1861 case OMPRTL__kmpc_omp_wait_deps: { 1862 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1863 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1864 // kmp_depend_info_t *noalias_dep_list); 1865 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1866 CGM.Int32Ty, CGM.VoidPtrTy, 1867 CGM.Int32Ty, CGM.VoidPtrTy}; 1868 llvm::FunctionType *FnTy = 1869 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1870 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1871 break; 1872 } 1873 case OMPRTL__kmpc_cancellationpoint: { 1874 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1875 // global_tid, kmp_int32 cncl_kind) 1876 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1877 llvm::FunctionType *FnTy = 1878 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1879 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1880 break; 1881 } 1882 case OMPRTL__kmpc_cancel: { 1883 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1884 // kmp_int32 cncl_kind) 1885 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1886 llvm::FunctionType *FnTy = 1887 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1888 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1889 break; 1890 } 1891 case OMPRTL__kmpc_push_num_teams: { 1892 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1893 // kmp_int32 num_teams, kmp_int32 num_threads) 1894 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1895 CGM.Int32Ty}; 1896 llvm::FunctionType *FnTy = 1897 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1898 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1899 break; 1900 } 1901 case OMPRTL__kmpc_fork_teams: { 1902 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1903 // microtask, ...); 1904 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1905 getKmpc_MicroPointerTy()}; 1906 llvm::FunctionType *FnTy = 1907 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1908 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1909 break; 1910 } 1911 case OMPRTL__kmpc_taskloop: { 1912 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1913 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1914 // sched, kmp_uint64 grainsize, void *task_dup); 1915 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1916 CGM.IntTy, 1917 CGM.VoidPtrTy, 1918 CGM.IntTy, 1919 CGM.Int64Ty->getPointerTo(), 1920 CGM.Int64Ty->getPointerTo(), 1921 CGM.Int64Ty, 1922 CGM.IntTy, 1923 CGM.IntTy, 1924 CGM.Int64Ty, 1925 CGM.VoidPtrTy}; 1926 llvm::FunctionType *FnTy = 1927 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1928 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 1929 break; 1930 } 1931 case OMPRTL__kmpc_doacross_init: { 1932 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 1933 // num_dims, struct kmp_dim *dims); 1934 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1935 CGM.Int32Ty, 1936 CGM.Int32Ty, 1937 CGM.VoidPtrTy}; 1938 llvm::FunctionType *FnTy = 1939 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1940 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 1941 break; 1942 } 1943 case OMPRTL__kmpc_doacross_fini: { 1944 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 1945 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1946 llvm::FunctionType *FnTy = 1947 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1948 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 1949 break; 1950 } 1951 case OMPRTL__kmpc_doacross_post: { 1952 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 1953 // *vec); 1954 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1955 CGM.Int64Ty->getPointerTo()}; 1956 llvm::FunctionType *FnTy = 1957 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1958 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 1959 break; 1960 } 1961 case OMPRTL__kmpc_doacross_wait: { 1962 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 1963 // *vec); 1964 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1965 CGM.Int64Ty->getPointerTo()}; 1966 llvm::FunctionType *FnTy = 1967 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1968 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 1969 break; 1970 } 1971 case OMPRTL__kmpc_task_reduction_init: { 1972 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 1973 // *data); 1974 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 1975 llvm::FunctionType *FnTy = 1976 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1977 RTLFn = 1978 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 1979 break; 1980 } 1981 case OMPRTL__kmpc_task_reduction_get_th_data: { 1982 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 1983 // *d); 1984 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 1985 llvm::FunctionType *FnTy = 1986 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1987 RTLFn = CGM.CreateRuntimeFunction( 1988 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 1989 break; 1990 } 1991 case OMPRTL__tgt_target: { 1992 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 1993 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 1994 // *arg_types); 1995 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1996 CGM.VoidPtrTy, 1997 CGM.Int32Ty, 1998 CGM.VoidPtrPtrTy, 1999 CGM.VoidPtrPtrTy, 2000 CGM.SizeTy->getPointerTo(), 2001 CGM.Int32Ty->getPointerTo()}; 2002 llvm::FunctionType *FnTy = 2003 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2004 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2005 break; 2006 } 2007 case OMPRTL__tgt_target_teams: { 2008 // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 2009 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 2010 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 2011 llvm::Type *TypeParams[] = {CGM.Int32Ty, 2012 CGM.VoidPtrTy, 2013 CGM.Int32Ty, 2014 CGM.VoidPtrPtrTy, 2015 CGM.VoidPtrPtrTy, 2016 CGM.SizeTy->getPointerTo(), 2017 CGM.Int32Ty->getPointerTo(), 2018 CGM.Int32Ty, 2019 CGM.Int32Ty}; 2020 llvm::FunctionType *FnTy = 2021 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2022 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2023 break; 2024 } 2025 case OMPRTL__tgt_register_lib: { 2026 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2027 QualType ParamTy = 2028 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2029 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2030 llvm::FunctionType *FnTy = 2031 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2032 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2033 break; 2034 } 2035 case OMPRTL__tgt_unregister_lib: { 2036 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2037 QualType ParamTy = 2038 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2039 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2040 llvm::FunctionType *FnTy = 2041 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2042 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2043 break; 2044 } 2045 case OMPRTL__tgt_target_data_begin: { 2046 // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 2047 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 2048 llvm::Type *TypeParams[] = {CGM.Int32Ty, 2049 CGM.Int32Ty, 2050 CGM.VoidPtrPtrTy, 2051 CGM.VoidPtrPtrTy, 2052 CGM.SizeTy->getPointerTo(), 2053 CGM.Int32Ty->getPointerTo()}; 2054 llvm::FunctionType *FnTy = 2055 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2056 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2057 break; 2058 } 2059 case OMPRTL__tgt_target_data_end: { 2060 // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 2061 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 2062 llvm::Type *TypeParams[] = {CGM.Int32Ty, 2063 CGM.Int32Ty, 2064 CGM.VoidPtrPtrTy, 2065 CGM.VoidPtrPtrTy, 2066 CGM.SizeTy->getPointerTo(), 2067 CGM.Int32Ty->getPointerTo()}; 2068 llvm::FunctionType *FnTy = 2069 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2070 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2071 break; 2072 } 2073 case OMPRTL__tgt_target_data_update: { 2074 // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, 2075 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 2076 llvm::Type *TypeParams[] = {CGM.Int32Ty, 2077 CGM.Int32Ty, 2078 CGM.VoidPtrPtrTy, 2079 CGM.VoidPtrPtrTy, 2080 CGM.SizeTy->getPointerTo(), 2081 CGM.Int32Ty->getPointerTo()}; 2082 llvm::FunctionType *FnTy = 2083 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2084 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2085 break; 2086 } 2087 } 2088 assert(RTLFn && "Unable to find OpenMP runtime function"); 2089 return RTLFn; 2090 } 2091 2092 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 2093 bool IVSigned) { 2094 assert((IVSize == 32 || IVSize == 64) && 2095 "IV size is not compatible with the omp runtime"); 2096 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2097 : "__kmpc_for_static_init_4u") 2098 : (IVSigned ? "__kmpc_for_static_init_8" 2099 : "__kmpc_for_static_init_8u"); 2100 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2101 auto PtrTy = llvm::PointerType::getUnqual(ITy); 2102 llvm::Type *TypeParams[] = { 2103 getIdentTyPointerTy(), // loc 2104 CGM.Int32Ty, // tid 2105 CGM.Int32Ty, // schedtype 2106 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2107 PtrTy, // p_lower 2108 PtrTy, // p_upper 2109 PtrTy, // p_stride 2110 ITy, // incr 2111 ITy // chunk 2112 }; 2113 llvm::FunctionType *FnTy = 2114 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2115 return CGM.CreateRuntimeFunction(FnTy, Name); 2116 } 2117 2118 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 2119 bool IVSigned) { 2120 assert((IVSize == 32 || IVSize == 64) && 2121 "IV size is not compatible with the omp runtime"); 2122 auto Name = 2123 IVSize == 32 2124 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2125 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2126 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2127 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2128 CGM.Int32Ty, // tid 2129 CGM.Int32Ty, // schedtype 2130 ITy, // lower 2131 ITy, // upper 2132 ITy, // stride 2133 ITy // chunk 2134 }; 2135 llvm::FunctionType *FnTy = 2136 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2137 return CGM.CreateRuntimeFunction(FnTy, Name); 2138 } 2139 2140 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 2141 bool IVSigned) { 2142 assert((IVSize == 32 || IVSize == 64) && 2143 "IV size is not compatible with the omp runtime"); 2144 auto Name = 2145 IVSize == 32 2146 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2147 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2148 llvm::Type *TypeParams[] = { 2149 getIdentTyPointerTy(), // loc 2150 CGM.Int32Ty, // tid 2151 }; 2152 llvm::FunctionType *FnTy = 2153 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2154 return CGM.CreateRuntimeFunction(FnTy, Name); 2155 } 2156 2157 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 2158 bool IVSigned) { 2159 assert((IVSize == 32 || IVSize == 64) && 2160 "IV size is not compatible with the omp runtime"); 2161 auto Name = 2162 IVSize == 32 2163 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2164 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2165 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2166 auto PtrTy = llvm::PointerType::getUnqual(ITy); 2167 llvm::Type *TypeParams[] = { 2168 getIdentTyPointerTy(), // loc 2169 CGM.Int32Ty, // tid 2170 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2171 PtrTy, // p_lower 2172 PtrTy, // p_upper 2173 PtrTy // p_stride 2174 }; 2175 llvm::FunctionType *FnTy = 2176 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2177 return CGM.CreateRuntimeFunction(FnTy, Name); 2178 } 2179 2180 llvm::Constant * 2181 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2182 assert(!CGM.getLangOpts().OpenMPUseTLS || 2183 !CGM.getContext().getTargetInfo().isTLSSupported()); 2184 // Lookup the entry, lazily creating it if necessary. 2185 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 2186 Twine(CGM.getMangledName(VD)) + ".cache."); 2187 } 2188 2189 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2190 const VarDecl *VD, 2191 Address VDAddr, 2192 SourceLocation Loc) { 2193 if (CGM.getLangOpts().OpenMPUseTLS && 2194 CGM.getContext().getTargetInfo().isTLSSupported()) 2195 return VDAddr; 2196 2197 auto VarTy = VDAddr.getElementType(); 2198 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2199 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2200 CGM.Int8PtrTy), 2201 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2202 getOrCreateThreadPrivateCache(VD)}; 2203 return Address(CGF.EmitRuntimeCall( 2204 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2205 VDAddr.getAlignment()); 2206 } 2207 2208 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2209 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2210 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2211 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2212 // library. 2213 auto OMPLoc = emitUpdateLocation(CGF, Loc); 2214 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2215 OMPLoc); 2216 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2217 // to register constructor/destructor for variable. 2218 llvm::Value *Args[] = {OMPLoc, 2219 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2220 CGM.VoidPtrTy), 2221 Ctor, CopyCtor, Dtor}; 2222 CGF.EmitRuntimeCall( 2223 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2224 } 2225 2226 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2227 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2228 bool PerformInit, CodeGenFunction *CGF) { 2229 if (CGM.getLangOpts().OpenMPUseTLS && 2230 CGM.getContext().getTargetInfo().isTLSSupported()) 2231 return nullptr; 2232 2233 VD = VD->getDefinition(CGM.getContext()); 2234 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 2235 ThreadPrivateWithDefinition.insert(VD); 2236 QualType ASTTy = VD->getType(); 2237 2238 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2239 auto Init = VD->getAnyInitializer(); 2240 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2241 // Generate function that re-emits the declaration's initializer into the 2242 // threadprivate copy of the variable VD 2243 CodeGenFunction CtorCGF(CGM); 2244 FunctionArgList Args; 2245 ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, 2246 ImplicitParamDecl::Other); 2247 Args.push_back(&Dst); 2248 2249 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2250 CGM.getContext().VoidPtrTy, Args); 2251 auto FTy = CGM.getTypes().GetFunctionType(FI); 2252 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 2253 FTy, ".__kmpc_global_ctor_.", FI, Loc); 2254 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2255 Args, SourceLocation()); 2256 auto ArgVal = CtorCGF.EmitLoadOfScalar( 2257 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2258 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2259 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2260 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 2261 CtorCGF.ConvertTypeForMem(ASTTy)); 2262 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2263 /*IsInitializer=*/true); 2264 ArgVal = CtorCGF.EmitLoadOfScalar( 2265 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2266 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2267 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2268 CtorCGF.FinishFunction(); 2269 Ctor = Fn; 2270 } 2271 if (VD->getType().isDestructedType() != QualType::DK_none) { 2272 // Generate function that emits destructor call for the threadprivate copy 2273 // of the variable VD 2274 CodeGenFunction DtorCGF(CGM); 2275 FunctionArgList Args; 2276 ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, 2277 ImplicitParamDecl::Other); 2278 Args.push_back(&Dst); 2279 2280 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2281 CGM.getContext().VoidTy, Args); 2282 auto FTy = CGM.getTypes().GetFunctionType(FI); 2283 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 2284 FTy, ".__kmpc_global_dtor_.", FI, Loc); 2285 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2286 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2287 SourceLocation()); 2288 // Create a scope with an artificial location for the body of this function. 2289 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2290 auto ArgVal = DtorCGF.EmitLoadOfScalar( 2291 DtorCGF.GetAddrOfLocalVar(&Dst), 2292 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2293 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2294 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2295 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2296 DtorCGF.FinishFunction(); 2297 Dtor = Fn; 2298 } 2299 // Do not emit init function if it is not required. 2300 if (!Ctor && !Dtor) 2301 return nullptr; 2302 2303 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2304 auto CopyCtorTy = 2305 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2306 /*isVarArg=*/false)->getPointerTo(); 2307 // Copying constructor for the threadprivate variable. 2308 // Must be NULL - reserved by runtime, but currently it requires that this 2309 // parameter is always NULL. Otherwise it fires assertion. 2310 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2311 if (Ctor == nullptr) { 2312 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2313 /*isVarArg=*/false)->getPointerTo(); 2314 Ctor = llvm::Constant::getNullValue(CtorTy); 2315 } 2316 if (Dtor == nullptr) { 2317 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2318 /*isVarArg=*/false)->getPointerTo(); 2319 Dtor = llvm::Constant::getNullValue(DtorTy); 2320 } 2321 if (!CGF) { 2322 auto InitFunctionTy = 2323 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2324 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2325 InitFunctionTy, ".__omp_threadprivate_init_.", 2326 CGM.getTypes().arrangeNullaryFunction()); 2327 CodeGenFunction InitCGF(CGM); 2328 FunctionArgList ArgList; 2329 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2330 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2331 Loc); 2332 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2333 InitCGF.FinishFunction(); 2334 return InitFunction; 2335 } 2336 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2337 } 2338 return nullptr; 2339 } 2340 2341 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2342 QualType VarType, 2343 StringRef Name) { 2344 llvm::Twine VarName(Name, ".artificial."); 2345 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2346 llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); 2347 llvm::Value *Args[] = { 2348 emitUpdateLocation(CGF, SourceLocation()), 2349 getThreadID(CGF, SourceLocation()), 2350 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2351 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2352 /*IsSigned=*/false), 2353 getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; 2354 return Address( 2355 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2356 CGF.EmitRuntimeCall( 2357 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2358 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2359 CGM.getPointerAlign()); 2360 } 2361 2362 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 2363 /// function. Here is the logic: 2364 /// if (Cond) { 2365 /// ThenGen(); 2366 /// } else { 2367 /// ElseGen(); 2368 /// } 2369 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2370 const RegionCodeGenTy &ThenGen, 2371 const RegionCodeGenTy &ElseGen) { 2372 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2373 2374 // If the condition constant folds and can be elided, try to avoid emitting 2375 // the condition and the dead arm of the if/else. 2376 bool CondConstant; 2377 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2378 if (CondConstant) 2379 ThenGen(CGF); 2380 else 2381 ElseGen(CGF); 2382 return; 2383 } 2384 2385 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2386 // emit the conditional branch. 2387 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 2388 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 2389 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 2390 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2391 2392 // Emit the 'then' code. 2393 CGF.EmitBlock(ThenBlock); 2394 ThenGen(CGF); 2395 CGF.EmitBranch(ContBlock); 2396 // Emit the 'else' code if present. 2397 // There is no need to emit line number for unconditional branch. 2398 (void)ApplyDebugLocation::CreateEmpty(CGF); 2399 CGF.EmitBlock(ElseBlock); 2400 ElseGen(CGF); 2401 // There is no need to emit line number for unconditional branch. 2402 (void)ApplyDebugLocation::CreateEmpty(CGF); 2403 CGF.EmitBranch(ContBlock); 2404 // Emit the continuation block for code after the if. 2405 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2406 } 2407 2408 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2409 llvm::Value *OutlinedFn, 2410 ArrayRef<llvm::Value *> CapturedVars, 2411 const Expr *IfCond) { 2412 if (!CGF.HaveInsertPoint()) 2413 return; 2414 auto *RTLoc = emitUpdateLocation(CGF, Loc); 2415 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2416 PrePostActionTy &) { 2417 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2418 auto &RT = CGF.CGM.getOpenMPRuntime(); 2419 llvm::Value *Args[] = { 2420 RTLoc, 2421 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2422 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2423 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2424 RealArgs.append(std::begin(Args), std::end(Args)); 2425 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2426 2427 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2428 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2429 }; 2430 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2431 PrePostActionTy &) { 2432 auto &RT = CGF.CGM.getOpenMPRuntime(); 2433 auto ThreadID = RT.getThreadID(CGF, Loc); 2434 // Build calls: 2435 // __kmpc_serialized_parallel(&Loc, GTid); 2436 llvm::Value *Args[] = {RTLoc, ThreadID}; 2437 CGF.EmitRuntimeCall( 2438 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2439 2440 // OutlinedFn(>id, &zero, CapturedStruct); 2441 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2442 Address ZeroAddr = 2443 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 2444 /*Name*/ ".zero.addr"); 2445 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2446 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2447 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2448 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2449 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2450 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 2451 2452 // __kmpc_end_serialized_parallel(&Loc, GTid); 2453 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2454 CGF.EmitRuntimeCall( 2455 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2456 EndArgs); 2457 }; 2458 if (IfCond) 2459 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 2460 else { 2461 RegionCodeGenTy ThenRCG(ThenGen); 2462 ThenRCG(CGF); 2463 } 2464 } 2465 2466 // If we're inside an (outlined) parallel region, use the region info's 2467 // thread-ID variable (it is passed in a first argument of the outlined function 2468 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2469 // regular serial code region, get thread ID by calling kmp_int32 2470 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2471 // return the address of that temp. 2472 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2473 SourceLocation Loc) { 2474 if (auto *OMPRegionInfo = 2475 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2476 if (OMPRegionInfo->getThreadIDVariable()) 2477 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 2478 2479 auto ThreadID = getThreadID(CGF, Loc); 2480 auto Int32Ty = 2481 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2482 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2483 CGF.EmitStoreOfScalar(ThreadID, 2484 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2485 2486 return ThreadIDTemp; 2487 } 2488 2489 llvm::Constant * 2490 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 2491 const llvm::Twine &Name) { 2492 SmallString<256> Buffer; 2493 llvm::raw_svector_ostream Out(Buffer); 2494 Out << Name; 2495 auto RuntimeName = Out.str(); 2496 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 2497 if (Elem.second) { 2498 assert(Elem.second->getType()->getPointerElementType() == Ty && 2499 "OMP internal variable has different type than requested"); 2500 return &*Elem.second; 2501 } 2502 2503 return Elem.second = new llvm::GlobalVariable( 2504 CGM.getModule(), Ty, /*IsConstant*/ false, 2505 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2506 Elem.first()); 2507 } 2508 2509 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2510 llvm::Twine Name(".gomp_critical_user_", CriticalName); 2511 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 2512 } 2513 2514 namespace { 2515 /// Common pre(post)-action for different OpenMP constructs. 2516 class CommonActionTy final : public PrePostActionTy { 2517 llvm::Value *EnterCallee; 2518 ArrayRef<llvm::Value *> EnterArgs; 2519 llvm::Value *ExitCallee; 2520 ArrayRef<llvm::Value *> ExitArgs; 2521 bool Conditional; 2522 llvm::BasicBlock *ContBlock = nullptr; 2523 2524 public: 2525 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 2526 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 2527 bool Conditional = false) 2528 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2529 ExitArgs(ExitArgs), Conditional(Conditional) {} 2530 void Enter(CodeGenFunction &CGF) override { 2531 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2532 if (Conditional) { 2533 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2534 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2535 ContBlock = CGF.createBasicBlock("omp_if.end"); 2536 // Generate the branch (If-stmt) 2537 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2538 CGF.EmitBlock(ThenBlock); 2539 } 2540 } 2541 void Done(CodeGenFunction &CGF) { 2542 // Emit the rest of blocks/branches 2543 CGF.EmitBranch(ContBlock); 2544 CGF.EmitBlock(ContBlock, true); 2545 } 2546 void Exit(CodeGenFunction &CGF) override { 2547 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2548 } 2549 }; 2550 } // anonymous namespace 2551 2552 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2553 StringRef CriticalName, 2554 const RegionCodeGenTy &CriticalOpGen, 2555 SourceLocation Loc, const Expr *Hint) { 2556 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2557 // CriticalOpGen(); 2558 // __kmpc_end_critical(ident_t *, gtid, Lock); 2559 // Prepare arguments and build a call to __kmpc_critical 2560 if (!CGF.HaveInsertPoint()) 2561 return; 2562 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2563 getCriticalRegionLock(CriticalName)}; 2564 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2565 std::end(Args)); 2566 if (Hint) { 2567 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2568 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2569 } 2570 CommonActionTy Action( 2571 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2572 : OMPRTL__kmpc_critical), 2573 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2574 CriticalOpGen.setAction(Action); 2575 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2576 } 2577 2578 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2579 const RegionCodeGenTy &MasterOpGen, 2580 SourceLocation Loc) { 2581 if (!CGF.HaveInsertPoint()) 2582 return; 2583 // if(__kmpc_master(ident_t *, gtid)) { 2584 // MasterOpGen(); 2585 // __kmpc_end_master(ident_t *, gtid); 2586 // } 2587 // Prepare arguments and build a call to __kmpc_master 2588 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2589 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2590 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2591 /*Conditional=*/true); 2592 MasterOpGen.setAction(Action); 2593 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2594 Action.Done(CGF); 2595 } 2596 2597 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2598 SourceLocation Loc) { 2599 if (!CGF.HaveInsertPoint()) 2600 return; 2601 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2602 llvm::Value *Args[] = { 2603 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2604 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2605 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2606 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2607 Region->emitUntiedSwitch(CGF); 2608 } 2609 2610 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2611 const RegionCodeGenTy &TaskgroupOpGen, 2612 SourceLocation Loc) { 2613 if (!CGF.HaveInsertPoint()) 2614 return; 2615 // __kmpc_taskgroup(ident_t *, gtid); 2616 // TaskgroupOpGen(); 2617 // __kmpc_end_taskgroup(ident_t *, gtid); 2618 // Prepare arguments and build a call to __kmpc_taskgroup 2619 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2620 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2621 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2622 Args); 2623 TaskgroupOpGen.setAction(Action); 2624 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2625 } 2626 2627 /// Given an array of pointers to variables, project the address of a 2628 /// given variable. 2629 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2630 unsigned Index, const VarDecl *Var) { 2631 // Pull out the pointer to the variable. 2632 Address PtrAddr = 2633 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2634 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2635 2636 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2637 Addr = CGF.Builder.CreateElementBitCast( 2638 Addr, CGF.ConvertTypeForMem(Var->getType())); 2639 return Addr; 2640 } 2641 2642 static llvm::Value *emitCopyprivateCopyFunction( 2643 CodeGenModule &CGM, llvm::Type *ArgsType, 2644 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2645 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 2646 auto &C = CGM.getContext(); 2647 // void copy_func(void *LHSArg, void *RHSArg); 2648 FunctionArgList Args; 2649 ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); 2650 ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); 2651 Args.push_back(&LHSArg); 2652 Args.push_back(&RHSArg); 2653 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2654 auto *Fn = llvm::Function::Create( 2655 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2656 ".omp.copyprivate.copy_func", &CGM.getModule()); 2657 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2658 CodeGenFunction CGF(CGM); 2659 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2660 // Dest = (void*[n])(LHSArg); 2661 // Src = (void*[n])(RHSArg); 2662 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2663 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2664 ArgsType), CGF.getPointerAlign()); 2665 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2666 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2667 ArgsType), CGF.getPointerAlign()); 2668 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2669 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2670 // ... 2671 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2672 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2673 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2674 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2675 2676 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2677 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2678 2679 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2680 QualType Type = VD->getType(); 2681 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2682 } 2683 CGF.FinishFunction(); 2684 return Fn; 2685 } 2686 2687 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2688 const RegionCodeGenTy &SingleOpGen, 2689 SourceLocation Loc, 2690 ArrayRef<const Expr *> CopyprivateVars, 2691 ArrayRef<const Expr *> SrcExprs, 2692 ArrayRef<const Expr *> DstExprs, 2693 ArrayRef<const Expr *> AssignmentOps) { 2694 if (!CGF.HaveInsertPoint()) 2695 return; 2696 assert(CopyprivateVars.size() == SrcExprs.size() && 2697 CopyprivateVars.size() == DstExprs.size() && 2698 CopyprivateVars.size() == AssignmentOps.size()); 2699 auto &C = CGM.getContext(); 2700 // int32 did_it = 0; 2701 // if(__kmpc_single(ident_t *, gtid)) { 2702 // SingleOpGen(); 2703 // __kmpc_end_single(ident_t *, gtid); 2704 // did_it = 1; 2705 // } 2706 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2707 // <copy_func>, did_it); 2708 2709 Address DidIt = Address::invalid(); 2710 if (!CopyprivateVars.empty()) { 2711 // int32 did_it = 0; 2712 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2713 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2714 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2715 } 2716 // Prepare arguments and build a call to __kmpc_single 2717 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2718 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2719 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2720 /*Conditional=*/true); 2721 SingleOpGen.setAction(Action); 2722 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2723 if (DidIt.isValid()) { 2724 // did_it = 1; 2725 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2726 } 2727 Action.Done(CGF); 2728 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2729 // <copy_func>, did_it); 2730 if (DidIt.isValid()) { 2731 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2732 auto CopyprivateArrayTy = 2733 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2734 /*IndexTypeQuals=*/0); 2735 // Create a list of all private variables for copyprivate. 2736 Address CopyprivateList = 2737 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2738 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2739 Address Elem = CGF.Builder.CreateConstArrayGEP( 2740 CopyprivateList, I, CGF.getPointerSize()); 2741 CGF.Builder.CreateStore( 2742 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2743 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2744 Elem); 2745 } 2746 // Build function that copies private values from single region to all other 2747 // threads in the corresponding parallel region. 2748 auto *CpyFn = emitCopyprivateCopyFunction( 2749 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2750 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 2751 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2752 Address CL = 2753 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2754 CGF.VoidPtrTy); 2755 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2756 llvm::Value *Args[] = { 2757 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2758 getThreadID(CGF, Loc), // i32 <gtid> 2759 BufSize, // size_t <buf_size> 2760 CL.getPointer(), // void *<copyprivate list> 2761 CpyFn, // void (*) (void *, void *) <copy_func> 2762 DidItVal // i32 did_it 2763 }; 2764 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2765 } 2766 } 2767 2768 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2769 const RegionCodeGenTy &OrderedOpGen, 2770 SourceLocation Loc, bool IsThreads) { 2771 if (!CGF.HaveInsertPoint()) 2772 return; 2773 // __kmpc_ordered(ident_t *, gtid); 2774 // OrderedOpGen(); 2775 // __kmpc_end_ordered(ident_t *, gtid); 2776 // Prepare arguments and build a call to __kmpc_ordered 2777 if (IsThreads) { 2778 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2779 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2780 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2781 Args); 2782 OrderedOpGen.setAction(Action); 2783 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2784 return; 2785 } 2786 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2787 } 2788 2789 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2790 OpenMPDirectiveKind Kind, bool EmitChecks, 2791 bool ForceSimpleCall) { 2792 if (!CGF.HaveInsertPoint()) 2793 return; 2794 // Build call __kmpc_cancel_barrier(loc, thread_id); 2795 // Build call __kmpc_barrier(loc, thread_id); 2796 unsigned Flags; 2797 if (Kind == OMPD_for) 2798 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2799 else if (Kind == OMPD_sections) 2800 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2801 else if (Kind == OMPD_single) 2802 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2803 else if (Kind == OMPD_barrier) 2804 Flags = OMP_IDENT_BARRIER_EXPL; 2805 else 2806 Flags = OMP_IDENT_BARRIER_IMPL; 2807 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2808 // thread_id); 2809 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2810 getThreadID(CGF, Loc)}; 2811 if (auto *OMPRegionInfo = 2812 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2813 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2814 auto *Result = CGF.EmitRuntimeCall( 2815 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2816 if (EmitChecks) { 2817 // if (__kmpc_cancel_barrier()) { 2818 // exit from construct; 2819 // } 2820 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2821 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2822 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2823 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2824 CGF.EmitBlock(ExitBB); 2825 // exit from construct; 2826 auto CancelDestination = 2827 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2828 CGF.EmitBranchThroughCleanup(CancelDestination); 2829 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2830 } 2831 return; 2832 } 2833 } 2834 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2835 } 2836 2837 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2838 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2839 bool Chunked, bool Ordered) { 2840 switch (ScheduleKind) { 2841 case OMPC_SCHEDULE_static: 2842 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2843 : (Ordered ? OMP_ord_static : OMP_sch_static); 2844 case OMPC_SCHEDULE_dynamic: 2845 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2846 case OMPC_SCHEDULE_guided: 2847 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2848 case OMPC_SCHEDULE_runtime: 2849 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2850 case OMPC_SCHEDULE_auto: 2851 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2852 case OMPC_SCHEDULE_unknown: 2853 assert(!Chunked && "chunk was specified but schedule kind not known"); 2854 return Ordered ? OMP_ord_static : OMP_sch_static; 2855 } 2856 llvm_unreachable("Unexpected runtime schedule"); 2857 } 2858 2859 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 2860 static OpenMPSchedType 2861 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2862 // only static is allowed for dist_schedule 2863 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2864 } 2865 2866 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2867 bool Chunked) const { 2868 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2869 return Schedule == OMP_sch_static; 2870 } 2871 2872 bool CGOpenMPRuntime::isStaticNonchunked( 2873 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2874 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2875 return Schedule == OMP_dist_sch_static; 2876 } 2877 2878 2879 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2880 auto Schedule = 2881 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2882 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2883 return Schedule != OMP_sch_static; 2884 } 2885 2886 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 2887 OpenMPScheduleClauseModifier M1, 2888 OpenMPScheduleClauseModifier M2) { 2889 int Modifier = 0; 2890 switch (M1) { 2891 case OMPC_SCHEDULE_MODIFIER_monotonic: 2892 Modifier = OMP_sch_modifier_monotonic; 2893 break; 2894 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2895 Modifier = OMP_sch_modifier_nonmonotonic; 2896 break; 2897 case OMPC_SCHEDULE_MODIFIER_simd: 2898 if (Schedule == OMP_sch_static_chunked) 2899 Schedule = OMP_sch_static_balanced_chunked; 2900 break; 2901 case OMPC_SCHEDULE_MODIFIER_last: 2902 case OMPC_SCHEDULE_MODIFIER_unknown: 2903 break; 2904 } 2905 switch (M2) { 2906 case OMPC_SCHEDULE_MODIFIER_monotonic: 2907 Modifier = OMP_sch_modifier_monotonic; 2908 break; 2909 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2910 Modifier = OMP_sch_modifier_nonmonotonic; 2911 break; 2912 case OMPC_SCHEDULE_MODIFIER_simd: 2913 if (Schedule == OMP_sch_static_chunked) 2914 Schedule = OMP_sch_static_balanced_chunked; 2915 break; 2916 case OMPC_SCHEDULE_MODIFIER_last: 2917 case OMPC_SCHEDULE_MODIFIER_unknown: 2918 break; 2919 } 2920 return Schedule | Modifier; 2921 } 2922 2923 void CGOpenMPRuntime::emitForDispatchInit( 2924 CodeGenFunction &CGF, SourceLocation Loc, 2925 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2926 bool Ordered, const DispatchRTInput &DispatchValues) { 2927 if (!CGF.HaveInsertPoint()) 2928 return; 2929 OpenMPSchedType Schedule = getRuntimeSchedule( 2930 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2931 assert(Ordered || 2932 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2933 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2934 Schedule != OMP_sch_static_balanced_chunked)); 2935 // Call __kmpc_dispatch_init( 2936 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2937 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2938 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2939 2940 // If the Chunk was not specified in the clause - use default value 1. 2941 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2942 : CGF.Builder.getIntN(IVSize, 1); 2943 llvm::Value *Args[] = { 2944 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2945 CGF.Builder.getInt32(addMonoNonMonoModifier( 2946 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2947 DispatchValues.LB, // Lower 2948 DispatchValues.UB, // Upper 2949 CGF.Builder.getIntN(IVSize, 1), // Stride 2950 Chunk // Chunk 2951 }; 2952 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2953 } 2954 2955 static void emitForStaticInitCall( 2956 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2957 llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, 2958 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2959 unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, 2960 Address ST, llvm::Value *Chunk) { 2961 if (!CGF.HaveInsertPoint()) 2962 return; 2963 2964 assert(!Ordered); 2965 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2966 Schedule == OMP_sch_static_balanced_chunked || 2967 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2968 Schedule == OMP_dist_sch_static || 2969 Schedule == OMP_dist_sch_static_chunked); 2970 2971 // Call __kmpc_for_static_init( 2972 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2973 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2974 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2975 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2976 if (Chunk == nullptr) { 2977 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2978 Schedule == OMP_dist_sch_static) && 2979 "expected static non-chunked schedule"); 2980 // If the Chunk was not specified in the clause - use default value 1. 2981 Chunk = CGF.Builder.getIntN(IVSize, 1); 2982 } else { 2983 assert((Schedule == OMP_sch_static_chunked || 2984 Schedule == OMP_sch_static_balanced_chunked || 2985 Schedule == OMP_ord_static_chunked || 2986 Schedule == OMP_dist_sch_static_chunked) && 2987 "expected static chunked schedule"); 2988 } 2989 llvm::Value *Args[] = { 2990 UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( 2991 Schedule, M1, M2)), // Schedule type 2992 IL.getPointer(), // &isLastIter 2993 LB.getPointer(), // &LB 2994 UB.getPointer(), // &UB 2995 ST.getPointer(), // &Stride 2996 CGF.Builder.getIntN(IVSize, 1), // Incr 2997 Chunk // Chunk 2998 }; 2999 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3000 } 3001 3002 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3003 SourceLocation Loc, 3004 const OpenMPScheduleTy &ScheduleKind, 3005 unsigned IVSize, bool IVSigned, 3006 bool Ordered, Address IL, Address LB, 3007 Address UB, Address ST, 3008 llvm::Value *Chunk) { 3009 OpenMPSchedType ScheduleNum = 3010 getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); 3011 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 3012 auto *ThreadId = getThreadID(CGF, Loc); 3013 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 3014 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3015 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, 3016 Ordered, IL, LB, UB, ST, Chunk); 3017 } 3018 3019 void CGOpenMPRuntime::emitDistributeStaticInit( 3020 CodeGenFunction &CGF, SourceLocation Loc, 3021 OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, 3022 bool Ordered, Address IL, Address LB, Address UB, Address ST, 3023 llvm::Value *Chunk) { 3024 OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); 3025 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 3026 auto *ThreadId = getThreadID(CGF, Loc); 3027 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 3028 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3029 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3030 OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, 3031 UB, ST, Chunk); 3032 } 3033 3034 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3035 SourceLocation Loc) { 3036 if (!CGF.HaveInsertPoint()) 3037 return; 3038 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3039 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3040 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3041 Args); 3042 } 3043 3044 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3045 SourceLocation Loc, 3046 unsigned IVSize, 3047 bool IVSigned) { 3048 if (!CGF.HaveInsertPoint()) 3049 return; 3050 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3051 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3052 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3053 } 3054 3055 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3056 SourceLocation Loc, unsigned IVSize, 3057 bool IVSigned, Address IL, 3058 Address LB, Address UB, 3059 Address ST) { 3060 // Call __kmpc_dispatch_next( 3061 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3062 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3063 // kmp_int[32|64] *p_stride); 3064 llvm::Value *Args[] = { 3065 emitUpdateLocation(CGF, Loc), 3066 getThreadID(CGF, Loc), 3067 IL.getPointer(), // &isLastIter 3068 LB.getPointer(), // &Lower 3069 UB.getPointer(), // &Upper 3070 ST.getPointer() // &Stride 3071 }; 3072 llvm::Value *Call = 3073 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3074 return CGF.EmitScalarConversion( 3075 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 3076 CGF.getContext().BoolTy, Loc); 3077 } 3078 3079 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3080 llvm::Value *NumThreads, 3081 SourceLocation Loc) { 3082 if (!CGF.HaveInsertPoint()) 3083 return; 3084 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3085 llvm::Value *Args[] = { 3086 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3087 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3088 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3089 Args); 3090 } 3091 3092 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3093 OpenMPProcBindClauseKind ProcBind, 3094 SourceLocation Loc) { 3095 if (!CGF.HaveInsertPoint()) 3096 return; 3097 // Constants for proc bind value accepted by the runtime. 3098 enum ProcBindTy { 3099 ProcBindFalse = 0, 3100 ProcBindTrue, 3101 ProcBindMaster, 3102 ProcBindClose, 3103 ProcBindSpread, 3104 ProcBindIntel, 3105 ProcBindDefault 3106 } RuntimeProcBind; 3107 switch (ProcBind) { 3108 case OMPC_PROC_BIND_master: 3109 RuntimeProcBind = ProcBindMaster; 3110 break; 3111 case OMPC_PROC_BIND_close: 3112 RuntimeProcBind = ProcBindClose; 3113 break; 3114 case OMPC_PROC_BIND_spread: 3115 RuntimeProcBind = ProcBindSpread; 3116 break; 3117 case OMPC_PROC_BIND_unknown: 3118 llvm_unreachable("Unsupported proc_bind value."); 3119 } 3120 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3121 llvm::Value *Args[] = { 3122 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3123 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3124 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3125 } 3126 3127 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3128 SourceLocation Loc) { 3129 if (!CGF.HaveInsertPoint()) 3130 return; 3131 // Build call void __kmpc_flush(ident_t *loc) 3132 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3133 emitUpdateLocation(CGF, Loc)); 3134 } 3135 3136 namespace { 3137 /// \brief Indexes of fields for type kmp_task_t. 3138 enum KmpTaskTFields { 3139 /// \brief List of shared variables. 3140 KmpTaskTShareds, 3141 /// \brief Task routine. 3142 KmpTaskTRoutine, 3143 /// \brief Partition id for the untied tasks. 3144 KmpTaskTPartId, 3145 /// Function with call of destructors for private variables. 3146 Data1, 3147 /// Task priority. 3148 Data2, 3149 /// (Taskloops only) Lower bound. 3150 KmpTaskTLowerBound, 3151 /// (Taskloops only) Upper bound. 3152 KmpTaskTUpperBound, 3153 /// (Taskloops only) Stride. 3154 KmpTaskTStride, 3155 /// (Taskloops only) Is last iteration flag. 3156 KmpTaskTLastIter, 3157 /// (Taskloops only) Reduction data. 3158 KmpTaskTReductions, 3159 }; 3160 } // anonymous namespace 3161 3162 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3163 // FIXME: Add other entries type when they become supported. 3164 return OffloadEntriesTargetRegion.empty(); 3165 } 3166 3167 /// \brief Initialize target region entry. 3168 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3169 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3170 StringRef ParentName, unsigned LineNum, 3171 unsigned Order) { 3172 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3173 "only required for the device " 3174 "code generation."); 3175 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3176 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3177 /*Flags=*/0); 3178 ++OffloadingEntriesNum; 3179 } 3180 3181 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3182 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3183 StringRef ParentName, unsigned LineNum, 3184 llvm::Constant *Addr, llvm::Constant *ID, 3185 int32_t Flags) { 3186 // If we are emitting code for a target, the entry is already initialized, 3187 // only has to be registered. 3188 if (CGM.getLangOpts().OpenMPIsDevice) { 3189 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3190 "Entry must exist."); 3191 auto &Entry = 3192 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3193 assert(Entry.isValid() && "Entry not initialized!"); 3194 Entry.setAddress(Addr); 3195 Entry.setID(ID); 3196 Entry.setFlags(Flags); 3197 return; 3198 } else { 3199 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); 3200 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3201 } 3202 } 3203 3204 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3205 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3206 unsigned LineNum) const { 3207 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3208 if (PerDevice == OffloadEntriesTargetRegion.end()) 3209 return false; 3210 auto PerFile = PerDevice->second.find(FileID); 3211 if (PerFile == PerDevice->second.end()) 3212 return false; 3213 auto PerParentName = PerFile->second.find(ParentName); 3214 if (PerParentName == PerFile->second.end()) 3215 return false; 3216 auto PerLine = PerParentName->second.find(LineNum); 3217 if (PerLine == PerParentName->second.end()) 3218 return false; 3219 // Fail if this entry is already registered. 3220 if (PerLine->second.getAddress() || PerLine->second.getID()) 3221 return false; 3222 return true; 3223 } 3224 3225 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3226 const OffloadTargetRegionEntryInfoActTy &Action) { 3227 // Scan all target region entries and perform the provided action. 3228 for (auto &D : OffloadEntriesTargetRegion) 3229 for (auto &F : D.second) 3230 for (auto &P : F.second) 3231 for (auto &L : P.second) 3232 Action(D.first, F.first, P.first(), L.first, L.second); 3233 } 3234 3235 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 3236 /// \a Codegen. This is used to emit the two functions that register and 3237 /// unregister the descriptor of the current compilation unit. 3238 static llvm::Function * 3239 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 3240 const RegionCodeGenTy &Codegen) { 3241 auto &C = CGM.getContext(); 3242 FunctionArgList Args; 3243 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3244 Args.push_back(&DummyPtr); 3245 3246 CodeGenFunction CGF(CGM); 3247 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3248 auto FTy = CGM.getTypes().GetFunctionType(FI); 3249 auto *Fn = 3250 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 3251 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 3252 Codegen(CGF); 3253 CGF.FinishFunction(); 3254 return Fn; 3255 } 3256 3257 llvm::Function * 3258 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3259 3260 // If we don't have entries or if we are emitting code for the device, we 3261 // don't need to do anything. 3262 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3263 return nullptr; 3264 3265 auto &M = CGM.getModule(); 3266 auto &C = CGM.getContext(); 3267 3268 // Get list of devices we care about 3269 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 3270 3271 // We should be creating an offloading descriptor only if there are devices 3272 // specified. 3273 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3274 3275 // Create the external variables that will point to the begin and end of the 3276 // host entries section. These will be defined by the linker. 3277 auto *OffloadEntryTy = 3278 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3279 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 3280 M, OffloadEntryTy, /*isConstant=*/true, 3281 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3282 ".omp_offloading.entries_begin"); 3283 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 3284 M, OffloadEntryTy, /*isConstant=*/true, 3285 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3286 ".omp_offloading.entries_end"); 3287 3288 // Create all device images 3289 auto *DeviceImageTy = cast<llvm::StructType>( 3290 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3291 ConstantInitBuilder DeviceImagesBuilder(CGM); 3292 auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); 3293 3294 for (unsigned i = 0; i < Devices.size(); ++i) { 3295 StringRef T = Devices[i].getTriple(); 3296 auto *ImgBegin = new llvm::GlobalVariable( 3297 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 3298 /*Initializer=*/nullptr, 3299 Twine(".omp_offloading.img_start.") + Twine(T)); 3300 auto *ImgEnd = new llvm::GlobalVariable( 3301 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 3302 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 3303 3304 auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); 3305 Dev.add(ImgBegin); 3306 Dev.add(ImgEnd); 3307 Dev.add(HostEntriesBegin); 3308 Dev.add(HostEntriesEnd); 3309 Dev.finishAndAddTo(DeviceImagesEntries); 3310 } 3311 3312 // Create device images global array. 3313 llvm::GlobalVariable *DeviceImages = 3314 DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", 3315 CGM.getPointerAlign(), 3316 /*isConstant=*/true); 3317 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3318 3319 // This is a Zero array to be used in the creation of the constant expressions 3320 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3321 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3322 3323 // Create the target region descriptor. 3324 auto *BinaryDescriptorTy = cast<llvm::StructType>( 3325 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 3326 ConstantInitBuilder DescBuilder(CGM); 3327 auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); 3328 DescInit.addInt(CGM.Int32Ty, Devices.size()); 3329 DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3330 DeviceImages, 3331 Index)); 3332 DescInit.add(HostEntriesBegin); 3333 DescInit.add(HostEntriesEnd); 3334 3335 auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", 3336 CGM.getPointerAlign(), 3337 /*isConstant=*/true); 3338 3339 // Emit code to register or unregister the descriptor at execution 3340 // startup or closing, respectively. 3341 3342 // Create a variable to drive the registration and unregistration of the 3343 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 3344 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 3345 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 3346 IdentInfo, C.CharTy, ImplicitParamDecl::Other); 3347 3348 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 3349 CGM, ".omp_offloading.descriptor_unreg", 3350 [&](CodeGenFunction &CGF, PrePostActionTy &) { 3351 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 3352 Desc); 3353 }); 3354 auto *RegFn = createOffloadingBinaryDescriptorFunction( 3355 CGM, ".omp_offloading.descriptor_reg", 3356 [&](CodeGenFunction &CGF, PrePostActionTy &) { 3357 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 3358 Desc); 3359 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 3360 }); 3361 if (CGM.supportsCOMDAT()) { 3362 // It is sufficient to call registration function only once, so create a 3363 // COMDAT group for registration/unregistration functions and associated 3364 // data. That would reduce startup time and code size. Registration 3365 // function serves as a COMDAT group key. 3366 auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); 3367 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 3368 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 3369 RegFn->setComdat(ComdatKey); 3370 UnRegFn->setComdat(ComdatKey); 3371 DeviceImages->setComdat(ComdatKey); 3372 Desc->setComdat(ComdatKey); 3373 } 3374 return RegFn; 3375 } 3376 3377 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 3378 llvm::Constant *Addr, uint64_t Size, 3379 int32_t Flags) { 3380 StringRef Name = Addr->getName(); 3381 auto *TgtOffloadEntryType = cast<llvm::StructType>( 3382 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 3383 llvm::LLVMContext &C = CGM.getModule().getContext(); 3384 llvm::Module &M = CGM.getModule(); 3385 3386 // Make sure the address has the right type. 3387 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 3388 3389 // Create constant string with the name. 3390 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3391 3392 llvm::GlobalVariable *Str = 3393 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 3394 llvm::GlobalValue::InternalLinkage, StrPtrInit, 3395 ".omp_offloading.entry_name"); 3396 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3397 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 3398 3399 // We can't have any padding between symbols, so we need to have 1-byte 3400 // alignment. 3401 auto Align = CharUnits::fromQuantity(1); 3402 3403 // Create the entry struct. 3404 ConstantInitBuilder EntryBuilder(CGM); 3405 auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); 3406 EntryInit.add(AddrPtr); 3407 EntryInit.add(StrPtr); 3408 EntryInit.addInt(CGM.SizeTy, Size); 3409 EntryInit.addInt(CGM.Int32Ty, Flags); 3410 EntryInit.addInt(CGM.Int32Ty, 0); 3411 llvm::GlobalVariable *Entry = 3412 EntryInit.finishAndCreateGlobal(".omp_offloading.entry", 3413 Align, 3414 /*constant*/ true, 3415 llvm::GlobalValue::ExternalLinkage); 3416 3417 // The entry has to be created in the section the linker expects it to be. 3418 Entry->setSection(".omp_offloading.entries"); 3419 } 3420 3421 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3422 // Emit the offloading entries and metadata so that the device codegen side 3423 // can easily figure out what to emit. The produced metadata looks like 3424 // this: 3425 // 3426 // !omp_offload.info = !{!1, ...} 3427 // 3428 // Right now we only generate metadata for function that contain target 3429 // regions. 3430 3431 // If we do not have entries, we dont need to do anything. 3432 if (OffloadEntriesInfoManager.empty()) 3433 return; 3434 3435 llvm::Module &M = CGM.getModule(); 3436 llvm::LLVMContext &C = M.getContext(); 3437 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 3438 OrderedEntries(OffloadEntriesInfoManager.size()); 3439 3440 // Create the offloading info metadata node. 3441 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3442 3443 // Auxiliary methods to create metadata values and strings. 3444 auto getMDInt = [&](unsigned v) { 3445 return llvm::ConstantAsMetadata::get( 3446 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 3447 }; 3448 3449 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 3450 3451 // Create function that emits metadata for each target region entry; 3452 auto &&TargetRegionMetadataEmitter = [&]( 3453 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 3454 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3455 llvm::SmallVector<llvm::Metadata *, 32> Ops; 3456 // Generate metadata for target regions. Each entry of this metadata 3457 // contains: 3458 // - Entry 0 -> Kind of this type of metadata (0). 3459 // - Entry 1 -> Device ID of the file where the entry was identified. 3460 // - Entry 2 -> File ID of the file where the entry was identified. 3461 // - Entry 3 -> Mangled name of the function where the entry was identified. 3462 // - Entry 4 -> Line in the file where the entry was identified. 3463 // - Entry 5 -> Order the entry was created. 3464 // The first element of the metadata node is the kind. 3465 Ops.push_back(getMDInt(E.getKind())); 3466 Ops.push_back(getMDInt(DeviceID)); 3467 Ops.push_back(getMDInt(FileID)); 3468 Ops.push_back(getMDString(ParentName)); 3469 Ops.push_back(getMDInt(Line)); 3470 Ops.push_back(getMDInt(E.getOrder())); 3471 3472 // Save this entry in the right position of the ordered entries array. 3473 OrderedEntries[E.getOrder()] = &E; 3474 3475 // Add metadata to the named metadata node. 3476 MD->addOperand(llvm::MDNode::get(C, Ops)); 3477 }; 3478 3479 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3480 TargetRegionMetadataEmitter); 3481 3482 for (auto *E : OrderedEntries) { 3483 assert(E && "All ordered entries must exist!"); 3484 if (auto *CE = 3485 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3486 E)) { 3487 assert(CE->getID() && CE->getAddress() && 3488 "Entry ID and Addr are invalid!"); 3489 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 3490 } else 3491 llvm_unreachable("Unsupported entry kind."); 3492 } 3493 } 3494 3495 /// \brief Loads all the offload entries information from the host IR 3496 /// metadata. 3497 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3498 // If we are in target mode, load the metadata from the host IR. This code has 3499 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3500 3501 if (!CGM.getLangOpts().OpenMPIsDevice) 3502 return; 3503 3504 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3505 return; 3506 3507 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3508 if (Buf.getError()) 3509 return; 3510 3511 llvm::LLVMContext C; 3512 auto ME = expectedToErrorOrAndEmitErrors( 3513 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3514 3515 if (ME.getError()) 3516 return; 3517 3518 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3519 if (!MD) 3520 return; 3521 3522 for (auto I : MD->operands()) { 3523 llvm::MDNode *MN = cast<llvm::MDNode>(I); 3524 3525 auto getMDInt = [&](unsigned Idx) { 3526 llvm::ConstantAsMetadata *V = 3527 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3528 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3529 }; 3530 3531 auto getMDString = [&](unsigned Idx) { 3532 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3533 return V->getString(); 3534 }; 3535 3536 switch (getMDInt(0)) { 3537 default: 3538 llvm_unreachable("Unexpected metadata!"); 3539 break; 3540 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3541 OFFLOAD_ENTRY_INFO_TARGET_REGION: 3542 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3543 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 3544 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 3545 /*Order=*/getMDInt(5)); 3546 break; 3547 } 3548 } 3549 } 3550 3551 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3552 if (!KmpRoutineEntryPtrTy) { 3553 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3554 auto &C = CGM.getContext(); 3555 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3556 FunctionProtoType::ExtProtoInfo EPI; 3557 KmpRoutineEntryPtrQTy = C.getPointerType( 3558 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3559 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3560 } 3561 } 3562 3563 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 3564 QualType FieldTy) { 3565 auto *Field = FieldDecl::Create( 3566 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 3567 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 3568 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 3569 Field->setAccess(AS_public); 3570 DC->addDecl(Field); 3571 return Field; 3572 } 3573 3574 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3575 3576 // Make sure the type of the entry is already created. This is the type we 3577 // have to create: 3578 // struct __tgt_offload_entry{ 3579 // void *addr; // Pointer to the offload entry info. 3580 // // (function or global) 3581 // char *name; // Name of the function or global. 3582 // size_t size; // Size of the entry info (0 if it a function). 3583 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3584 // int32_t reserved; // Reserved, to use by the runtime library. 3585 // }; 3586 if (TgtOffloadEntryQTy.isNull()) { 3587 ASTContext &C = CGM.getContext(); 3588 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3589 RD->startDefinition(); 3590 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3591 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3592 addFieldToRecordDecl(C, RD, C.getSizeType()); 3593 addFieldToRecordDecl( 3594 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3595 addFieldToRecordDecl( 3596 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3597 RD->completeDefinition(); 3598 TgtOffloadEntryQTy = C.getRecordType(RD); 3599 } 3600 return TgtOffloadEntryQTy; 3601 } 3602 3603 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 3604 // These are the types we need to build: 3605 // struct __tgt_device_image{ 3606 // void *ImageStart; // Pointer to the target code start. 3607 // void *ImageEnd; // Pointer to the target code end. 3608 // // We also add the host entries to the device image, as it may be useful 3609 // // for the target runtime to have access to that information. 3610 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 3611 // // the entries. 3612 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3613 // // entries (non inclusive). 3614 // }; 3615 if (TgtDeviceImageQTy.isNull()) { 3616 ASTContext &C = CGM.getContext(); 3617 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 3618 RD->startDefinition(); 3619 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3620 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3621 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3622 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3623 RD->completeDefinition(); 3624 TgtDeviceImageQTy = C.getRecordType(RD); 3625 } 3626 return TgtDeviceImageQTy; 3627 } 3628 3629 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 3630 // struct __tgt_bin_desc{ 3631 // int32_t NumDevices; // Number of devices supported. 3632 // __tgt_device_image *DeviceImages; // Arrays of device images 3633 // // (one per device). 3634 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 3635 // // entries. 3636 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3637 // // entries (non inclusive). 3638 // }; 3639 if (TgtBinaryDescriptorQTy.isNull()) { 3640 ASTContext &C = CGM.getContext(); 3641 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 3642 RD->startDefinition(); 3643 addFieldToRecordDecl( 3644 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3645 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 3646 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3647 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3648 RD->completeDefinition(); 3649 TgtBinaryDescriptorQTy = C.getRecordType(RD); 3650 } 3651 return TgtBinaryDescriptorQTy; 3652 } 3653 3654 namespace { 3655 struct PrivateHelpersTy { 3656 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 3657 const VarDecl *PrivateElemInit) 3658 : Original(Original), PrivateCopy(PrivateCopy), 3659 PrivateElemInit(PrivateElemInit) {} 3660 const VarDecl *Original; 3661 const VarDecl *PrivateCopy; 3662 const VarDecl *PrivateElemInit; 3663 }; 3664 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3665 } // anonymous namespace 3666 3667 static RecordDecl * 3668 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3669 if (!Privates.empty()) { 3670 auto &C = CGM.getContext(); 3671 // Build struct .kmp_privates_t. { 3672 // /* private vars */ 3673 // }; 3674 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 3675 RD->startDefinition(); 3676 for (auto &&Pair : Privates) { 3677 auto *VD = Pair.second.Original; 3678 auto Type = VD->getType(); 3679 Type = Type.getNonReferenceType(); 3680 auto *FD = addFieldToRecordDecl(C, RD, Type); 3681 if (VD->hasAttrs()) { 3682 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3683 E(VD->getAttrs().end()); 3684 I != E; ++I) 3685 FD->addAttr(*I); 3686 } 3687 } 3688 RD->completeDefinition(); 3689 return RD; 3690 } 3691 return nullptr; 3692 } 3693 3694 static RecordDecl * 3695 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3696 QualType KmpInt32Ty, 3697 QualType KmpRoutineEntryPointerQTy) { 3698 auto &C = CGM.getContext(); 3699 // Build struct kmp_task_t { 3700 // void * shareds; 3701 // kmp_routine_entry_t routine; 3702 // kmp_int32 part_id; 3703 // kmp_cmplrdata_t data1; 3704 // kmp_cmplrdata_t data2; 3705 // For taskloops additional fields: 3706 // kmp_uint64 lb; 3707 // kmp_uint64 ub; 3708 // kmp_int64 st; 3709 // kmp_int32 liter; 3710 // void * reductions; 3711 // }; 3712 auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3713 UD->startDefinition(); 3714 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3715 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3716 UD->completeDefinition(); 3717 QualType KmpCmplrdataTy = C.getRecordType(UD); 3718 auto *RD = C.buildImplicitRecord("kmp_task_t"); 3719 RD->startDefinition(); 3720 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3721 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3722 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3723 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3724 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3725 if (isOpenMPTaskLoopDirective(Kind)) { 3726 QualType KmpUInt64Ty = 3727 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3728 QualType KmpInt64Ty = 3729 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3730 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3731 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3732 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3733 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3734 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3735 } 3736 RD->completeDefinition(); 3737 return RD; 3738 } 3739 3740 static RecordDecl * 3741 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3742 ArrayRef<PrivateDataTy> Privates) { 3743 auto &C = CGM.getContext(); 3744 // Build struct kmp_task_t_with_privates { 3745 // kmp_task_t task_data; 3746 // .kmp_privates_t. privates; 3747 // }; 3748 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3749 RD->startDefinition(); 3750 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3751 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 3752 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3753 } 3754 RD->completeDefinition(); 3755 return RD; 3756 } 3757 3758 /// \brief Emit a proxy function which accepts kmp_task_t as the second 3759 /// argument. 3760 /// \code 3761 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3762 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3763 /// For taskloops: 3764 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3765 /// tt->reductions, tt->shareds); 3766 /// return 0; 3767 /// } 3768 /// \endcode 3769 static llvm::Value * 3770 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3771 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3772 QualType KmpTaskTWithPrivatesPtrQTy, 3773 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3774 QualType SharedsPtrTy, llvm::Value *TaskFunction, 3775 llvm::Value *TaskPrivatesMap) { 3776 auto &C = CGM.getContext(); 3777 FunctionArgList Args; 3778 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3779 ImplicitParamDecl::Other); 3780 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3781 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3782 ImplicitParamDecl::Other); 3783 Args.push_back(&GtidArg); 3784 Args.push_back(&TaskTypeArg); 3785 auto &TaskEntryFnInfo = 3786 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3787 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3788 auto *TaskEntry = 3789 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 3790 ".omp_task_entry.", &CGM.getModule()); 3791 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 3792 CodeGenFunction CGF(CGM); 3793 CGF.disableDebugInfo(); 3794 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 3795 3796 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3797 // tt, 3798 // For taskloops: 3799 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3800 // tt->task_data.shareds); 3801 auto *GtidParam = CGF.EmitLoadOfScalar( 3802 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3803 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3804 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3805 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3806 auto *KmpTaskTWithPrivatesQTyRD = 3807 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3808 LValue Base = 3809 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3810 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3811 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3812 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3813 auto *PartidParam = PartIdLVal.getPointer(); 3814 3815 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3816 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3817 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3818 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 3819 CGF.ConvertTypeForMem(SharedsPtrTy)); 3820 3821 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3822 llvm::Value *PrivatesParam; 3823 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3824 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3825 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3826 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3827 } else 3828 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3829 3830 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3831 TaskPrivatesMap, 3832 CGF.Builder 3833 .CreatePointerBitCastOrAddrSpaceCast( 3834 TDBase.getAddress(), CGF.VoidPtrTy) 3835 .getPointer()}; 3836 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3837 std::end(CommonArgs)); 3838 if (isOpenMPTaskLoopDirective(Kind)) { 3839 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3840 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3841 auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); 3842 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3843 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3844 auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); 3845 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3846 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 3847 auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); 3848 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3849 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 3850 auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); 3851 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3852 auto RLVal = CGF.EmitLValueForField(Base, *RFI); 3853 auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal(); 3854 CallArgs.push_back(LBParam); 3855 CallArgs.push_back(UBParam); 3856 CallArgs.push_back(StParam); 3857 CallArgs.push_back(LIParam); 3858 CallArgs.push_back(RParam); 3859 } 3860 CallArgs.push_back(SharedsParam); 3861 3862 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 3863 CGF.EmitStoreThroughLValue( 3864 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3865 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3866 CGF.FinishFunction(); 3867 return TaskEntry; 3868 } 3869 3870 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3871 SourceLocation Loc, 3872 QualType KmpInt32Ty, 3873 QualType KmpTaskTWithPrivatesPtrQTy, 3874 QualType KmpTaskTWithPrivatesQTy) { 3875 auto &C = CGM.getContext(); 3876 FunctionArgList Args; 3877 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3878 ImplicitParamDecl::Other); 3879 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3880 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3881 ImplicitParamDecl::Other); 3882 Args.push_back(&GtidArg); 3883 Args.push_back(&TaskTypeArg); 3884 FunctionType::ExtInfo Info; 3885 auto &DestructorFnInfo = 3886 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3887 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 3888 auto *DestructorFn = 3889 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3890 ".omp_task_destructor.", &CGM.getModule()); 3891 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 3892 DestructorFnInfo); 3893 CodeGenFunction CGF(CGM); 3894 CGF.disableDebugInfo(); 3895 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3896 Args); 3897 3898 LValue Base = CGF.EmitLoadOfPointerLValue( 3899 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3900 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3901 auto *KmpTaskTWithPrivatesQTyRD = 3902 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3903 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3904 Base = CGF.EmitLValueForField(Base, *FI); 3905 for (auto *Field : 3906 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3907 if (auto DtorKind = Field->getType().isDestructedType()) { 3908 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 3909 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3910 } 3911 } 3912 CGF.FinishFunction(); 3913 return DestructorFn; 3914 } 3915 3916 /// \brief Emit a privates mapping function for correct handling of private and 3917 /// firstprivate variables. 3918 /// \code 3919 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3920 /// **noalias priv1,..., <tyn> **noalias privn) { 3921 /// *priv1 = &.privates.priv1; 3922 /// ...; 3923 /// *privn = &.privates.privn; 3924 /// } 3925 /// \endcode 3926 static llvm::Value * 3927 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3928 ArrayRef<const Expr *> PrivateVars, 3929 ArrayRef<const Expr *> FirstprivateVars, 3930 ArrayRef<const Expr *> LastprivateVars, 3931 QualType PrivatesQTy, 3932 ArrayRef<PrivateDataTy> Privates) { 3933 auto &C = CGM.getContext(); 3934 FunctionArgList Args; 3935 ImplicitParamDecl TaskPrivatesArg( 3936 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3937 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3938 ImplicitParamDecl::Other); 3939 Args.push_back(&TaskPrivatesArg); 3940 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3941 unsigned Counter = 1; 3942 for (auto *E: PrivateVars) { 3943 Args.push_back(ImplicitParamDecl::Create( 3944 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3945 C.getPointerType(C.getPointerType(E->getType())) 3946 .withConst() 3947 .withRestrict(), 3948 ImplicitParamDecl::Other)); 3949 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3950 PrivateVarsPos[VD] = Counter; 3951 ++Counter; 3952 } 3953 for (auto *E : FirstprivateVars) { 3954 Args.push_back(ImplicitParamDecl::Create( 3955 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3956 C.getPointerType(C.getPointerType(E->getType())) 3957 .withConst() 3958 .withRestrict(), 3959 ImplicitParamDecl::Other)); 3960 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3961 PrivateVarsPos[VD] = Counter; 3962 ++Counter; 3963 } 3964 for (auto *E: LastprivateVars) { 3965 Args.push_back(ImplicitParamDecl::Create( 3966 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3967 C.getPointerType(C.getPointerType(E->getType())) 3968 .withConst() 3969 .withRestrict(), 3970 ImplicitParamDecl::Other)); 3971 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3972 PrivateVarsPos[VD] = Counter; 3973 ++Counter; 3974 } 3975 auto &TaskPrivatesMapFnInfo = 3976 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3977 auto *TaskPrivatesMapTy = 3978 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3979 auto *TaskPrivatesMap = llvm::Function::Create( 3980 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 3981 ".omp_task_privates_map.", &CGM.getModule()); 3982 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 3983 TaskPrivatesMapFnInfo); 3984 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3985 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3986 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3987 CodeGenFunction CGF(CGM); 3988 CGF.disableDebugInfo(); 3989 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3990 TaskPrivatesMapFnInfo, Args); 3991 3992 // *privi = &.privates.privi; 3993 LValue Base = CGF.EmitLoadOfPointerLValue( 3994 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3995 TaskPrivatesArg.getType()->castAs<PointerType>()); 3996 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3997 Counter = 0; 3998 for (auto *Field : PrivatesQTyRD->fields()) { 3999 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 4000 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4001 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4002 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4003 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4004 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4005 ++Counter; 4006 } 4007 CGF.FinishFunction(); 4008 return TaskPrivatesMap; 4009 } 4010 4011 static int array_pod_sort_comparator(const PrivateDataTy *P1, 4012 const PrivateDataTy *P2) { 4013 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 4014 } 4015 4016 /// Emit initialization for private variables in task-based directives. 4017 static void emitPrivatesInit(CodeGenFunction &CGF, 4018 const OMPExecutableDirective &D, 4019 Address KmpTaskSharedsPtr, LValue TDBase, 4020 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4021 QualType SharedsTy, QualType SharedsPtrTy, 4022 const OMPTaskDataTy &Data, 4023 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4024 auto &C = CGF.getContext(); 4025 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4026 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4027 LValue SrcBase; 4028 if (!Data.FirstprivateVars.empty()) { 4029 SrcBase = CGF.MakeAddrLValue( 4030 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4031 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4032 SharedsTy); 4033 } 4034 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 4035 cast<CapturedStmt>(*D.getAssociatedStmt())); 4036 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4037 for (auto &&Pair : Privates) { 4038 auto *VD = Pair.second.PrivateCopy; 4039 auto *Init = VD->getAnyInitializer(); 4040 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4041 !CGF.isTrivialInitializer(Init)))) { 4042 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4043 if (auto *Elem = Pair.second.PrivateElemInit) { 4044 auto *OriginalVD = Pair.second.Original; 4045 auto *SharedField = CapturesInfo.lookup(OriginalVD); 4046 auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4047 SharedRefLValue = CGF.MakeAddrLValue( 4048 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4049 SharedRefLValue.getType(), 4050 LValueBaseInfo(AlignmentSource::Decl, 4051 SharedRefLValue.getBaseInfo().getMayAlias())); 4052 QualType Type = OriginalVD->getType(); 4053 if (Type->isArrayType()) { 4054 // Initialize firstprivate array. 4055 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4056 // Perform simple memcpy. 4057 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 4058 SharedRefLValue.getAddress(), Type); 4059 } else { 4060 // Initialize firstprivate array using element-by-element 4061 // initialization. 4062 CGF.EmitOMPAggregateAssign( 4063 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4064 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4065 Address SrcElement) { 4066 // Clean up any temporaries needed by the initialization. 4067 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4068 InitScope.addPrivate( 4069 Elem, [SrcElement]() -> Address { return SrcElement; }); 4070 (void)InitScope.Privatize(); 4071 // Emit initialization for single element. 4072 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4073 CGF, &CapturesInfo); 4074 CGF.EmitAnyExprToMem(Init, DestElement, 4075 Init->getType().getQualifiers(), 4076 /*IsInitializer=*/false); 4077 }); 4078 } 4079 } else { 4080 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4081 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4082 return SharedRefLValue.getAddress(); 4083 }); 4084 (void)InitScope.Privatize(); 4085 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4086 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4087 /*capturedByInit=*/false); 4088 } 4089 } else 4090 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4091 } 4092 ++FI; 4093 } 4094 } 4095 4096 /// Check if duplication function is required for taskloops. 4097 static bool checkInitIsRequired(CodeGenFunction &CGF, 4098 ArrayRef<PrivateDataTy> Privates) { 4099 bool InitRequired = false; 4100 for (auto &&Pair : Privates) { 4101 auto *VD = Pair.second.PrivateCopy; 4102 auto *Init = VD->getAnyInitializer(); 4103 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4104 !CGF.isTrivialInitializer(Init)); 4105 } 4106 return InitRequired; 4107 } 4108 4109 4110 /// Emit task_dup function (for initialization of 4111 /// private/firstprivate/lastprivate vars and last_iter flag) 4112 /// \code 4113 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4114 /// lastpriv) { 4115 /// // setup lastprivate flag 4116 /// task_dst->last = lastpriv; 4117 /// // could be constructor calls here... 4118 /// } 4119 /// \endcode 4120 static llvm::Value * 4121 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4122 const OMPExecutableDirective &D, 4123 QualType KmpTaskTWithPrivatesPtrQTy, 4124 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4125 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4126 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4127 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4128 auto &C = CGM.getContext(); 4129 FunctionArgList Args; 4130 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4131 KmpTaskTWithPrivatesPtrQTy, 4132 ImplicitParamDecl::Other); 4133 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4134 KmpTaskTWithPrivatesPtrQTy, 4135 ImplicitParamDecl::Other); 4136 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4137 ImplicitParamDecl::Other); 4138 Args.push_back(&DstArg); 4139 Args.push_back(&SrcArg); 4140 Args.push_back(&LastprivArg); 4141 auto &TaskDupFnInfo = 4142 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4143 auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4144 auto *TaskDup = 4145 llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, 4146 ".omp_task_dup.", &CGM.getModule()); 4147 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); 4148 CodeGenFunction CGF(CGM); 4149 CGF.disableDebugInfo(); 4150 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); 4151 4152 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4153 CGF.GetAddrOfLocalVar(&DstArg), 4154 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4155 // task_dst->liter = lastpriv; 4156 if (WithLastIter) { 4157 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4158 LValue Base = CGF.EmitLValueForField( 4159 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4160 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4161 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4162 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4163 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4164 } 4165 4166 // Emit initial values for private copies (if any). 4167 assert(!Privates.empty()); 4168 Address KmpTaskSharedsPtr = Address::invalid(); 4169 if (!Data.FirstprivateVars.empty()) { 4170 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4171 CGF.GetAddrOfLocalVar(&SrcArg), 4172 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4173 LValue Base = CGF.EmitLValueForField( 4174 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4175 KmpTaskSharedsPtr = Address( 4176 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4177 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4178 KmpTaskTShareds)), 4179 Loc), 4180 CGF.getNaturalTypeAlignment(SharedsTy)); 4181 } 4182 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4183 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4184 CGF.FinishFunction(); 4185 return TaskDup; 4186 } 4187 4188 /// Checks if destructor function is required to be generated. 4189 /// \return true if cleanups are required, false otherwise. 4190 static bool 4191 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4192 bool NeedsCleanup = false; 4193 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4194 auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4195 for (auto *FD : PrivateRD->fields()) { 4196 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4197 if (NeedsCleanup) 4198 break; 4199 } 4200 return NeedsCleanup; 4201 } 4202 4203 CGOpenMPRuntime::TaskResultTy 4204 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4205 const OMPExecutableDirective &D, 4206 llvm::Value *TaskFunction, QualType SharedsTy, 4207 Address Shareds, const OMPTaskDataTy &Data) { 4208 auto &C = CGM.getContext(); 4209 llvm::SmallVector<PrivateDataTy, 4> Privates; 4210 // Aggregate privates and sort them by the alignment. 4211 auto I = Data.PrivateCopies.begin(); 4212 for (auto *E : Data.PrivateVars) { 4213 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4214 Privates.push_back(std::make_pair( 4215 C.getDeclAlign(VD), 4216 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4217 /*PrivateElemInit=*/nullptr))); 4218 ++I; 4219 } 4220 I = Data.FirstprivateCopies.begin(); 4221 auto IElemInitRef = Data.FirstprivateInits.begin(); 4222 for (auto *E : Data.FirstprivateVars) { 4223 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4224 Privates.push_back(std::make_pair( 4225 C.getDeclAlign(VD), 4226 PrivateHelpersTy( 4227 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4228 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 4229 ++I; 4230 ++IElemInitRef; 4231 } 4232 I = Data.LastprivateCopies.begin(); 4233 for (auto *E : Data.LastprivateVars) { 4234 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4235 Privates.push_back(std::make_pair( 4236 C.getDeclAlign(VD), 4237 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4238 /*PrivateElemInit=*/nullptr))); 4239 ++I; 4240 } 4241 llvm::array_pod_sort(Privates.begin(), Privates.end(), 4242 array_pod_sort_comparator); 4243 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4244 // Build type kmp_routine_entry_t (if not built yet). 4245 emitKmpRoutineEntryT(KmpInt32Ty); 4246 // Build type kmp_task_t (if not built yet). 4247 if (KmpTaskTQTy.isNull()) { 4248 KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4249 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4250 } 4251 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4252 // Build particular struct kmp_task_t for the given task. 4253 auto *KmpTaskTWithPrivatesQTyRD = 4254 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4255 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4256 QualType KmpTaskTWithPrivatesPtrQTy = 4257 C.getPointerType(KmpTaskTWithPrivatesQTy); 4258 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4259 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 4260 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4261 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4262 4263 // Emit initial values for private copies (if any). 4264 llvm::Value *TaskPrivatesMap = nullptr; 4265 auto *TaskPrivatesMapTy = 4266 std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); 4267 if (!Privates.empty()) { 4268 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4269 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4270 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4271 FI->getType(), Privates); 4272 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4273 TaskPrivatesMap, TaskPrivatesMapTy); 4274 } else { 4275 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4276 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4277 } 4278 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4279 // kmp_task_t *tt); 4280 auto *TaskEntry = emitProxyTaskFunction( 4281 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4282 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4283 TaskPrivatesMap); 4284 4285 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4286 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4287 // kmp_routine_entry_t *task_entry); 4288 // Task flags. Format is taken from 4289 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 4290 // description of kmp_tasking_flags struct. 4291 enum { 4292 TiedFlag = 0x1, 4293 FinalFlag = 0x2, 4294 DestructorsFlag = 0x8, 4295 PriorityFlag = 0x20 4296 }; 4297 unsigned Flags = Data.Tied ? TiedFlag : 0; 4298 bool NeedsCleanup = false; 4299 if (!Privates.empty()) { 4300 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4301 if (NeedsCleanup) 4302 Flags = Flags | DestructorsFlag; 4303 } 4304 if (Data.Priority.getInt()) 4305 Flags = Flags | PriorityFlag; 4306 auto *TaskFlags = 4307 Data.Final.getPointer() 4308 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4309 CGF.Builder.getInt32(FinalFlag), 4310 CGF.Builder.getInt32(/*C=*/0)) 4311 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4312 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4313 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4314 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 4315 getThreadID(CGF, Loc), TaskFlags, 4316 KmpTaskTWithPrivatesTySize, SharedsSize, 4317 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4318 TaskEntry, KmpRoutineEntryPtrTy)}; 4319 auto *NewTask = CGF.EmitRuntimeCall( 4320 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 4321 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4322 NewTask, KmpTaskTWithPrivatesPtrTy); 4323 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4324 KmpTaskTWithPrivatesQTy); 4325 LValue TDBase = 4326 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4327 // Fill the data in the resulting kmp_task_t record. 4328 // Copy shareds if there are any. 4329 Address KmpTaskSharedsPtr = Address::invalid(); 4330 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4331 KmpTaskSharedsPtr = 4332 Address(CGF.EmitLoadOfScalar( 4333 CGF.EmitLValueForField( 4334 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4335 KmpTaskTShareds)), 4336 Loc), 4337 CGF.getNaturalTypeAlignment(SharedsTy)); 4338 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 4339 } 4340 // Emit initial values for private copies (if any). 4341 TaskResultTy Result; 4342 if (!Privates.empty()) { 4343 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4344 SharedsTy, SharedsPtrTy, Data, Privates, 4345 /*ForDup=*/false); 4346 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4347 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4348 Result.TaskDupFn = emitTaskDupFunction( 4349 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4350 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4351 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4352 } 4353 } 4354 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4355 enum { Priority = 0, Destructors = 1 }; 4356 // Provide pointer to function with destructors for privates. 4357 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4358 auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); 4359 if (NeedsCleanup) { 4360 llvm::Value *DestructorFn = emitDestructorsFunction( 4361 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4362 KmpTaskTWithPrivatesQTy); 4363 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4364 LValue DestructorsLV = CGF.EmitLValueForField( 4365 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4366 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4367 DestructorFn, KmpRoutineEntryPtrTy), 4368 DestructorsLV); 4369 } 4370 // Set priority. 4371 if (Data.Priority.getInt()) { 4372 LValue Data2LV = CGF.EmitLValueForField( 4373 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4374 LValue PriorityLV = CGF.EmitLValueForField( 4375 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4376 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4377 } 4378 Result.NewTask = NewTask; 4379 Result.TaskEntry = TaskEntry; 4380 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4381 Result.TDBase = TDBase; 4382 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4383 return Result; 4384 } 4385 4386 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4387 const OMPExecutableDirective &D, 4388 llvm::Value *TaskFunction, 4389 QualType SharedsTy, Address Shareds, 4390 const Expr *IfCond, 4391 const OMPTaskDataTy &Data) { 4392 if (!CGF.HaveInsertPoint()) 4393 return; 4394 4395 TaskResultTy Result = 4396 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4397 llvm::Value *NewTask = Result.NewTask; 4398 llvm::Value *TaskEntry = Result.TaskEntry; 4399 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4400 LValue TDBase = Result.TDBase; 4401 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4402 auto &C = CGM.getContext(); 4403 // Process list of dependences. 4404 Address DependenciesArray = Address::invalid(); 4405 unsigned NumDependencies = Data.Dependences.size(); 4406 if (NumDependencies) { 4407 // Dependence kind for RTL. 4408 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 4409 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4410 RecordDecl *KmpDependInfoRD; 4411 QualType FlagsTy = 4412 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4413 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4414 if (KmpDependInfoTy.isNull()) { 4415 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4416 KmpDependInfoRD->startDefinition(); 4417 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4418 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4419 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4420 KmpDependInfoRD->completeDefinition(); 4421 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4422 } else 4423 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4424 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 4425 // Define type kmp_depend_info[<Dependences.size()>]; 4426 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4427 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 4428 ArrayType::Normal, /*IndexTypeQuals=*/0); 4429 // kmp_depend_info[<Dependences.size()>] deps; 4430 DependenciesArray = 4431 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4432 for (unsigned i = 0; i < NumDependencies; ++i) { 4433 const Expr *E = Data.Dependences[i].second; 4434 auto Addr = CGF.EmitLValue(E); 4435 llvm::Value *Size; 4436 QualType Ty = E->getType(); 4437 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4438 LValue UpAddrLVal = 4439 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 4440 llvm::Value *UpAddr = 4441 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 4442 llvm::Value *LowIntPtr = 4443 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 4444 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 4445 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4446 } else 4447 Size = CGF.getTypeSize(Ty); 4448 auto Base = CGF.MakeAddrLValue( 4449 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 4450 KmpDependInfoTy); 4451 // deps[i].base_addr = &<Dependences[i].second>; 4452 auto BaseAddrLVal = CGF.EmitLValueForField( 4453 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4454 CGF.EmitStoreOfScalar( 4455 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 4456 BaseAddrLVal); 4457 // deps[i].len = sizeof(<Dependences[i].second>); 4458 auto LenLVal = CGF.EmitLValueForField( 4459 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4460 CGF.EmitStoreOfScalar(Size, LenLVal); 4461 // deps[i].flags = <Dependences[i].first>; 4462 RTLDependenceKindTy DepKind; 4463 switch (Data.Dependences[i].first) { 4464 case OMPC_DEPEND_in: 4465 DepKind = DepIn; 4466 break; 4467 // Out and InOut dependencies must use the same code. 4468 case OMPC_DEPEND_out: 4469 case OMPC_DEPEND_inout: 4470 DepKind = DepInOut; 4471 break; 4472 case OMPC_DEPEND_source: 4473 case OMPC_DEPEND_sink: 4474 case OMPC_DEPEND_unknown: 4475 llvm_unreachable("Unknown task dependence type"); 4476 } 4477 auto FlagsLVal = CGF.EmitLValueForField( 4478 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4479 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4480 FlagsLVal); 4481 } 4482 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4483 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 4484 CGF.VoidPtrTy); 4485 } 4486 4487 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4488 // libcall. 4489 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4490 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4491 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4492 // list is not empty 4493 auto *ThreadID = getThreadID(CGF, Loc); 4494 auto *UpLoc = emitUpdateLocation(CGF, Loc); 4495 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4496 llvm::Value *DepTaskArgs[7]; 4497 if (NumDependencies) { 4498 DepTaskArgs[0] = UpLoc; 4499 DepTaskArgs[1] = ThreadID; 4500 DepTaskArgs[2] = NewTask; 4501 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 4502 DepTaskArgs[4] = DependenciesArray.getPointer(); 4503 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4504 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4505 } 4506 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 4507 &TaskArgs, 4508 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4509 if (!Data.Tied) { 4510 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4511 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4512 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4513 } 4514 if (NumDependencies) { 4515 CGF.EmitRuntimeCall( 4516 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 4517 } else { 4518 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 4519 TaskArgs); 4520 } 4521 // Check if parent region is untied and build return for untied task; 4522 if (auto *Region = 4523 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4524 Region->emitUntiedSwitch(CGF); 4525 }; 4526 4527 llvm::Value *DepWaitTaskArgs[6]; 4528 if (NumDependencies) { 4529 DepWaitTaskArgs[0] = UpLoc; 4530 DepWaitTaskArgs[1] = ThreadID; 4531 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 4532 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4533 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4534 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4535 } 4536 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 4537 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, 4538 PrePostActionTy &) { 4539 auto &RT = CGF.CGM.getOpenMPRuntime(); 4540 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4541 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4542 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4543 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4544 // is specified. 4545 if (NumDependencies) 4546 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 4547 DepWaitTaskArgs); 4548 // Call proxy_task_entry(gtid, new_task); 4549 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( 4550 CodeGenFunction &CGF, PrePostActionTy &Action) { 4551 Action.Enter(CGF); 4552 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4553 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 4554 }; 4555 4556 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4557 // kmp_task_t *new_task); 4558 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4559 // kmp_task_t *new_task); 4560 RegionCodeGenTy RCG(CodeGen); 4561 CommonActionTy Action( 4562 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 4563 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 4564 RCG.setAction(Action); 4565 RCG(CGF); 4566 }; 4567 4568 if (IfCond) 4569 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4570 else { 4571 RegionCodeGenTy ThenRCG(ThenCodeGen); 4572 ThenRCG(CGF); 4573 } 4574 } 4575 4576 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4577 const OMPLoopDirective &D, 4578 llvm::Value *TaskFunction, 4579 QualType SharedsTy, Address Shareds, 4580 const Expr *IfCond, 4581 const OMPTaskDataTy &Data) { 4582 if (!CGF.HaveInsertPoint()) 4583 return; 4584 TaskResultTy Result = 4585 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4586 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 4587 // libcall. 4588 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4589 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4590 // sched, kmp_uint64 grainsize, void *task_dup); 4591 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4592 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4593 llvm::Value *IfVal; 4594 if (IfCond) { 4595 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4596 /*isSigned=*/true); 4597 } else 4598 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4599 4600 LValue LBLVal = CGF.EmitLValueForField( 4601 Result.TDBase, 4602 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4603 auto *LBVar = 4604 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4605 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4606 /*IsInitializer=*/true); 4607 LValue UBLVal = CGF.EmitLValueForField( 4608 Result.TDBase, 4609 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4610 auto *UBVar = 4611 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4612 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4613 /*IsInitializer=*/true); 4614 LValue StLVal = CGF.EmitLValueForField( 4615 Result.TDBase, 4616 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4617 auto *StVar = 4618 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4619 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4620 /*IsInitializer=*/true); 4621 // Store reductions address. 4622 LValue RedLVal = CGF.EmitLValueForField( 4623 Result.TDBase, 4624 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 4625 if (Data.Reductions) 4626 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 4627 else { 4628 CGF.EmitNullInitialization(RedLVal.getAddress(), 4629 CGF.getContext().VoidPtrTy); 4630 } 4631 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4632 llvm::Value *TaskArgs[] = { 4633 UpLoc, 4634 ThreadID, 4635 Result.NewTask, 4636 IfVal, 4637 LBLVal.getPointer(), 4638 UBLVal.getPointer(), 4639 CGF.EmitLoadOfScalar(StLVal, SourceLocation()), 4640 llvm::ConstantInt::getNullValue( 4641 CGF.IntTy), // Always 0 because taskgroup emitted by the compiler 4642 llvm::ConstantInt::getSigned( 4643 CGF.IntTy, Data.Schedule.getPointer() 4644 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4645 : NoSchedule), 4646 Data.Schedule.getPointer() 4647 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4648 /*isSigned=*/false) 4649 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4650 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4651 Result.TaskDupFn, CGF.VoidPtrTy) 4652 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4653 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 4654 } 4655 4656 /// \brief Emit reduction operation for each element of array (required for 4657 /// array sections) LHS op = RHS. 4658 /// \param Type Type of array. 4659 /// \param LHSVar Variable on the left side of the reduction operation 4660 /// (references element of array in original variable). 4661 /// \param RHSVar Variable on the right side of the reduction operation 4662 /// (references element of array in original variable). 4663 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4664 /// RHSVar. 4665 static void EmitOMPAggregateReduction( 4666 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4667 const VarDecl *RHSVar, 4668 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4669 const Expr *, const Expr *)> &RedOpGen, 4670 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4671 const Expr *UpExpr = nullptr) { 4672 // Perform element-by-element initialization. 4673 QualType ElementTy; 4674 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4675 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4676 4677 // Drill down to the base element type on both arrays. 4678 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 4679 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4680 4681 auto RHSBegin = RHSAddr.getPointer(); 4682 auto LHSBegin = LHSAddr.getPointer(); 4683 // Cast from pointer to array type to pointer to single element. 4684 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 4685 // The basic structure here is a while-do loop. 4686 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4687 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4688 auto IsEmpty = 4689 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4690 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4691 4692 // Enter the loop body, making that address the current address. 4693 auto EntryBB = CGF.Builder.GetInsertBlock(); 4694 CGF.EmitBlock(BodyBB); 4695 4696 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4697 4698 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4699 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4700 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4701 Address RHSElementCurrent = 4702 Address(RHSElementPHI, 4703 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4704 4705 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4706 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4707 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4708 Address LHSElementCurrent = 4709 Address(LHSElementPHI, 4710 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4711 4712 // Emit copy. 4713 CodeGenFunction::OMPPrivateScope Scope(CGF); 4714 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 4715 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 4716 Scope.Privatize(); 4717 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4718 Scope.ForceCleanup(); 4719 4720 // Shift the address forward by one element. 4721 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4722 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 4723 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4724 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 4725 // Check whether we've reached the end. 4726 auto Done = 4727 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4728 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4729 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4730 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4731 4732 // Done. 4733 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4734 } 4735 4736 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4737 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4738 /// UDR combiner function. 4739 static void emitReductionCombiner(CodeGenFunction &CGF, 4740 const Expr *ReductionOp) { 4741 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4742 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4743 if (auto *DRE = 4744 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4745 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4746 std::pair<llvm::Function *, llvm::Function *> Reduction = 4747 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4748 RValue Func = RValue::get(Reduction.first); 4749 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4750 CGF.EmitIgnoredExpr(ReductionOp); 4751 return; 4752 } 4753 CGF.EmitIgnoredExpr(ReductionOp); 4754 } 4755 4756 llvm::Value *CGOpenMPRuntime::emitReductionFunction( 4757 CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 4758 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 4759 ArrayRef<const Expr *> ReductionOps) { 4760 auto &C = CGM.getContext(); 4761 4762 // void reduction_func(void *LHSArg, void *RHSArg); 4763 FunctionArgList Args; 4764 ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); 4765 ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); 4766 Args.push_back(&LHSArg); 4767 Args.push_back(&RHSArg); 4768 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4769 auto *Fn = llvm::Function::Create( 4770 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 4771 ".omp.reduction.reduction_func", &CGM.getModule()); 4772 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 4773 CodeGenFunction CGF(CGM); 4774 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 4775 4776 // Dst = (void*[n])(LHSArg); 4777 // Src = (void*[n])(RHSArg); 4778 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4779 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4780 ArgsType), CGF.getPointerAlign()); 4781 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4782 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4783 ArgsType), CGF.getPointerAlign()); 4784 4785 // ... 4786 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4787 // ... 4788 CodeGenFunction::OMPPrivateScope Scope(CGF); 4789 auto IPriv = Privates.begin(); 4790 unsigned Idx = 0; 4791 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4792 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4793 Scope.addPrivate(RHSVar, [&]() -> Address { 4794 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 4795 }); 4796 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4797 Scope.addPrivate(LHSVar, [&]() -> Address { 4798 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 4799 }); 4800 QualType PrivTy = (*IPriv)->getType(); 4801 if (PrivTy->isVariablyModifiedType()) { 4802 // Get array size and emit VLA type. 4803 ++Idx; 4804 Address Elem = 4805 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 4806 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4807 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 4808 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4809 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4810 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4811 CGF.EmitVariablyModifiedType(PrivTy); 4812 } 4813 } 4814 Scope.Privatize(); 4815 IPriv = Privates.begin(); 4816 auto ILHS = LHSExprs.begin(); 4817 auto IRHS = RHSExprs.begin(); 4818 for (auto *E : ReductionOps) { 4819 if ((*IPriv)->getType()->isArrayType()) { 4820 // Emit reduction for array section. 4821 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4822 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4823 EmitOMPAggregateReduction( 4824 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4825 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4826 emitReductionCombiner(CGF, E); 4827 }); 4828 } else 4829 // Emit reduction for array subscript or single variable. 4830 emitReductionCombiner(CGF, E); 4831 ++IPriv; 4832 ++ILHS; 4833 ++IRHS; 4834 } 4835 Scope.ForceCleanup(); 4836 CGF.FinishFunction(); 4837 return Fn; 4838 } 4839 4840 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 4841 const Expr *ReductionOp, 4842 const Expr *PrivateRef, 4843 const DeclRefExpr *LHS, 4844 const DeclRefExpr *RHS) { 4845 if (PrivateRef->getType()->isArrayType()) { 4846 // Emit reduction for array section. 4847 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4848 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4849 EmitOMPAggregateReduction( 4850 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4851 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4852 emitReductionCombiner(CGF, ReductionOp); 4853 }); 4854 } else 4855 // Emit reduction for array subscript or single variable. 4856 emitReductionCombiner(CGF, ReductionOp); 4857 } 4858 4859 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4860 ArrayRef<const Expr *> Privates, 4861 ArrayRef<const Expr *> LHSExprs, 4862 ArrayRef<const Expr *> RHSExprs, 4863 ArrayRef<const Expr *> ReductionOps, 4864 ReductionOptionsTy Options) { 4865 if (!CGF.HaveInsertPoint()) 4866 return; 4867 4868 bool WithNowait = Options.WithNowait; 4869 bool SimpleReduction = Options.SimpleReduction; 4870 4871 // Next code should be emitted for reduction: 4872 // 4873 // static kmp_critical_name lock = { 0 }; 4874 // 4875 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4876 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4877 // ... 4878 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4879 // *(Type<n>-1*)rhs[<n>-1]); 4880 // } 4881 // 4882 // ... 4883 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4884 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4885 // RedList, reduce_func, &<lock>)) { 4886 // case 1: 4887 // ... 4888 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4889 // ... 4890 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4891 // break; 4892 // case 2: 4893 // ... 4894 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4895 // ... 4896 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4897 // break; 4898 // default:; 4899 // } 4900 // 4901 // if SimpleReduction is true, only the next code is generated: 4902 // ... 4903 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4904 // ... 4905 4906 auto &C = CGM.getContext(); 4907 4908 if (SimpleReduction) { 4909 CodeGenFunction::RunCleanupsScope Scope(CGF); 4910 auto IPriv = Privates.begin(); 4911 auto ILHS = LHSExprs.begin(); 4912 auto IRHS = RHSExprs.begin(); 4913 for (auto *E : ReductionOps) { 4914 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4915 cast<DeclRefExpr>(*IRHS)); 4916 ++IPriv; 4917 ++ILHS; 4918 ++IRHS; 4919 } 4920 return; 4921 } 4922 4923 // 1. Build a list of reduction variables. 4924 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4925 auto Size = RHSExprs.size(); 4926 for (auto *E : Privates) { 4927 if (E->getType()->isVariablyModifiedType()) 4928 // Reserve place for array size. 4929 ++Size; 4930 } 4931 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4932 QualType ReductionArrayTy = 4933 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 4934 /*IndexTypeQuals=*/0); 4935 Address ReductionList = 4936 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4937 auto IPriv = Privates.begin(); 4938 unsigned Idx = 0; 4939 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4940 Address Elem = 4941 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 4942 CGF.Builder.CreateStore( 4943 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4944 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 4945 Elem); 4946 if ((*IPriv)->getType()->isVariablyModifiedType()) { 4947 // Store array size. 4948 ++Idx; 4949 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 4950 CGF.getPointerSize()); 4951 llvm::Value *Size = CGF.Builder.CreateIntCast( 4952 CGF.getVLASize( 4953 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 4954 .first, 4955 CGF.SizeTy, /*isSigned=*/false); 4956 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 4957 Elem); 4958 } 4959 } 4960 4961 // 2. Emit reduce_func(). 4962 auto *ReductionFn = emitReductionFunction( 4963 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 4964 LHSExprs, RHSExprs, ReductionOps); 4965 4966 // 3. Create static kmp_critical_name lock = { 0 }; 4967 auto *Lock = getCriticalRegionLock(".reduction"); 4968 4969 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4970 // RedList, reduce_func, &<lock>); 4971 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 4972 auto *ThreadId = getThreadID(CGF, Loc); 4973 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 4974 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4975 ReductionList.getPointer(), CGF.VoidPtrTy); 4976 llvm::Value *Args[] = { 4977 IdentTLoc, // ident_t *<loc> 4978 ThreadId, // i32 <gtid> 4979 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 4980 ReductionArrayTySize, // size_type sizeof(RedList) 4981 RL, // void *RedList 4982 ReductionFn, // void (*) (void *, void *) <reduce_func> 4983 Lock // kmp_critical_name *&<lock> 4984 }; 4985 auto Res = CGF.EmitRuntimeCall( 4986 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 4987 : OMPRTL__kmpc_reduce), 4988 Args); 4989 4990 // 5. Build switch(res) 4991 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 4992 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 4993 4994 // 6. Build case 1: 4995 // ... 4996 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4997 // ... 4998 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4999 // break; 5000 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5001 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5002 CGF.EmitBlock(Case1BB); 5003 5004 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5005 llvm::Value *EndArgs[] = { 5006 IdentTLoc, // ident_t *<loc> 5007 ThreadId, // i32 <gtid> 5008 Lock // kmp_critical_name *&<lock> 5009 }; 5010 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 5011 CodeGenFunction &CGF, PrePostActionTy &Action) { 5012 auto &RT = CGF.CGM.getOpenMPRuntime(); 5013 auto IPriv = Privates.begin(); 5014 auto ILHS = LHSExprs.begin(); 5015 auto IRHS = RHSExprs.begin(); 5016 for (auto *E : ReductionOps) { 5017 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5018 cast<DeclRefExpr>(*IRHS)); 5019 ++IPriv; 5020 ++ILHS; 5021 ++IRHS; 5022 } 5023 }; 5024 RegionCodeGenTy RCG(CodeGen); 5025 CommonActionTy Action( 5026 nullptr, llvm::None, 5027 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5028 : OMPRTL__kmpc_end_reduce), 5029 EndArgs); 5030 RCG.setAction(Action); 5031 RCG(CGF); 5032 5033 CGF.EmitBranch(DefaultBB); 5034 5035 // 7. Build case 2: 5036 // ... 5037 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5038 // ... 5039 // break; 5040 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5041 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5042 CGF.EmitBlock(Case2BB); 5043 5044 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 5045 CodeGenFunction &CGF, PrePostActionTy &Action) { 5046 auto ILHS = LHSExprs.begin(); 5047 auto IRHS = RHSExprs.begin(); 5048 auto IPriv = Privates.begin(); 5049 for (auto *E : ReductionOps) { 5050 const Expr *XExpr = nullptr; 5051 const Expr *EExpr = nullptr; 5052 const Expr *UpExpr = nullptr; 5053 BinaryOperatorKind BO = BO_Comma; 5054 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 5055 if (BO->getOpcode() == BO_Assign) { 5056 XExpr = BO->getLHS(); 5057 UpExpr = BO->getRHS(); 5058 } 5059 } 5060 // Try to emit update expression as a simple atomic. 5061 auto *RHSExpr = UpExpr; 5062 if (RHSExpr) { 5063 // Analyze RHS part of the whole expression. 5064 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 5065 RHSExpr->IgnoreParenImpCasts())) { 5066 // If this is a conditional operator, analyze its condition for 5067 // min/max reduction operator. 5068 RHSExpr = ACO->getCond(); 5069 } 5070 if (auto *BORHS = 5071 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5072 EExpr = BORHS->getRHS(); 5073 BO = BORHS->getOpcode(); 5074 } 5075 } 5076 if (XExpr) { 5077 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5078 auto &&AtomicRedGen = [BO, VD, 5079 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5080 const Expr *EExpr, const Expr *UpExpr) { 5081 LValue X = CGF.EmitLValue(XExpr); 5082 RValue E; 5083 if (EExpr) 5084 E = CGF.EmitAnyExpr(EExpr); 5085 CGF.EmitOMPAtomicSimpleUpdateExpr( 5086 X, E, BO, /*IsXLHSInRHSPart=*/true, 5087 llvm::AtomicOrdering::Monotonic, Loc, 5088 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5089 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5090 PrivateScope.addPrivate( 5091 VD, [&CGF, VD, XRValue, Loc]() -> Address { 5092 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5093 CGF.emitOMPSimpleStore( 5094 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5095 VD->getType().getNonReferenceType(), Loc); 5096 return LHSTemp; 5097 }); 5098 (void)PrivateScope.Privatize(); 5099 return CGF.EmitAnyExpr(UpExpr); 5100 }); 5101 }; 5102 if ((*IPriv)->getType()->isArrayType()) { 5103 // Emit atomic reduction for array section. 5104 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5105 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5106 AtomicRedGen, XExpr, EExpr, UpExpr); 5107 } else 5108 // Emit atomic reduction for array subscript or single variable. 5109 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5110 } else { 5111 // Emit as a critical region. 5112 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5113 const Expr *, const Expr *) { 5114 auto &RT = CGF.CGM.getOpenMPRuntime(); 5115 RT.emitCriticalRegion( 5116 CGF, ".atomic_reduction", 5117 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5118 Action.Enter(CGF); 5119 emitReductionCombiner(CGF, E); 5120 }, 5121 Loc); 5122 }; 5123 if ((*IPriv)->getType()->isArrayType()) { 5124 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5125 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5126 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5127 CritRedGen); 5128 } else 5129 CritRedGen(CGF, nullptr, nullptr, nullptr); 5130 } 5131 ++ILHS; 5132 ++IRHS; 5133 ++IPriv; 5134 } 5135 }; 5136 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5137 if (!WithNowait) { 5138 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5139 llvm::Value *EndArgs[] = { 5140 IdentTLoc, // ident_t *<loc> 5141 ThreadId, // i32 <gtid> 5142 Lock // kmp_critical_name *&<lock> 5143 }; 5144 CommonActionTy Action(nullptr, llvm::None, 5145 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5146 EndArgs); 5147 AtomicRCG.setAction(Action); 5148 AtomicRCG(CGF); 5149 } else 5150 AtomicRCG(CGF); 5151 5152 CGF.EmitBranch(DefaultBB); 5153 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5154 } 5155 5156 /// Generates unique name for artificial threadprivate variables. 5157 /// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N> 5158 static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, 5159 unsigned N) { 5160 SmallString<256> Buffer; 5161 llvm::raw_svector_ostream Out(Buffer); 5162 Out << Prefix << "." << Loc.getRawEncoding() << "_" << N; 5163 return Out.str(); 5164 } 5165 5166 /// Emits reduction initializer function: 5167 /// \code 5168 /// void @.red_init(void* %arg) { 5169 /// %0 = bitcast void* %arg to <type>* 5170 /// store <type> <init>, <type>* %0 5171 /// ret void 5172 /// } 5173 /// \endcode 5174 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5175 SourceLocation Loc, 5176 ReductionCodeGen &RCG, unsigned N) { 5177 auto &C = CGM.getContext(); 5178 FunctionArgList Args; 5179 ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); 5180 Args.emplace_back(&Param); 5181 auto &FnInfo = 5182 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5183 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5184 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5185 ".red_init.", &CGM.getModule()); 5186 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 5187 CodeGenFunction CGF(CGM); 5188 CGF.disableDebugInfo(); 5189 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 5190 Address PrivateAddr = CGF.EmitLoadOfPointer( 5191 CGF.GetAddrOfLocalVar(&Param), 5192 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5193 llvm::Value *Size = nullptr; 5194 // If the size of the reduction item is non-constant, load it from global 5195 // threadprivate variable. 5196 if (RCG.getSizes(N).second) { 5197 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5198 CGF, CGM.getContext().getSizeType(), 5199 generateUniqueName("reduction_size", Loc, N)); 5200 Size = 5201 CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5202 CGM.getContext().getSizeType(), SourceLocation()); 5203 } 5204 RCG.emitAggregateType(CGF, N, Size); 5205 LValue SharedLVal; 5206 // If initializer uses initializer from declare reduction construct, emit a 5207 // pointer to the address of the original reduction item (reuired by reduction 5208 // initializer) 5209 if (RCG.usesReductionInitializer(N)) { 5210 Address SharedAddr = 5211 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5212 CGF, CGM.getContext().VoidPtrTy, 5213 generateUniqueName("reduction", Loc, N)); 5214 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5215 } else { 5216 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 5217 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5218 CGM.getContext().VoidPtrTy); 5219 } 5220 // Emit the initializer: 5221 // %0 = bitcast void* %arg to <type>* 5222 // store <type> <init>, <type>* %0 5223 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 5224 [](CodeGenFunction &) { return false; }); 5225 CGF.FinishFunction(); 5226 return Fn; 5227 } 5228 5229 /// Emits reduction combiner function: 5230 /// \code 5231 /// void @.red_comb(void* %arg0, void* %arg1) { 5232 /// %lhs = bitcast void* %arg0 to <type>* 5233 /// %rhs = bitcast void* %arg1 to <type>* 5234 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5235 /// store <type> %2, <type>* %lhs 5236 /// ret void 5237 /// } 5238 /// \endcode 5239 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5240 SourceLocation Loc, 5241 ReductionCodeGen &RCG, unsigned N, 5242 const Expr *ReductionOp, 5243 const Expr *LHS, const Expr *RHS, 5244 const Expr *PrivateRef) { 5245 auto &C = CGM.getContext(); 5246 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5247 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5248 FunctionArgList Args; 5249 ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other); 5250 ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other); 5251 Args.emplace_back(&ParamInOut); 5252 Args.emplace_back(&ParamIn); 5253 auto &FnInfo = 5254 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5255 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5256 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5257 ".red_comb.", &CGM.getModule()); 5258 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 5259 CodeGenFunction CGF(CGM); 5260 CGF.disableDebugInfo(); 5261 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 5262 llvm::Value *Size = nullptr; 5263 // If the size of the reduction item is non-constant, load it from global 5264 // threadprivate variable. 5265 if (RCG.getSizes(N).second) { 5266 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5267 CGF, CGM.getContext().getSizeType(), 5268 generateUniqueName("reduction_size", Loc, N)); 5269 Size = 5270 CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5271 CGM.getContext().getSizeType(), SourceLocation()); 5272 } 5273 RCG.emitAggregateType(CGF, N, Size); 5274 // Remap lhs and rhs variables to the addresses of the function arguments. 5275 // %lhs = bitcast void* %arg0 to <type>* 5276 // %rhs = bitcast void* %arg1 to <type>* 5277 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5278 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { 5279 // Pull out the pointer to the variable. 5280 Address PtrAddr = CGF.EmitLoadOfPointer( 5281 CGF.GetAddrOfLocalVar(&ParamInOut), 5282 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5283 return CGF.Builder.CreateElementBitCast( 5284 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5285 }); 5286 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { 5287 // Pull out the pointer to the variable. 5288 Address PtrAddr = CGF.EmitLoadOfPointer( 5289 CGF.GetAddrOfLocalVar(&ParamIn), 5290 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5291 return CGF.Builder.CreateElementBitCast( 5292 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5293 }); 5294 PrivateScope.Privatize(); 5295 // Emit the combiner body: 5296 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5297 // store <type> %2, <type>* %lhs 5298 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5299 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5300 cast<DeclRefExpr>(RHS)); 5301 CGF.FinishFunction(); 5302 return Fn; 5303 } 5304 5305 /// Emits reduction finalizer function: 5306 /// \code 5307 /// void @.red_fini(void* %arg) { 5308 /// %0 = bitcast void* %arg to <type>* 5309 /// <destroy>(<type>* %0) 5310 /// ret void 5311 /// } 5312 /// \endcode 5313 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5314 SourceLocation Loc, 5315 ReductionCodeGen &RCG, unsigned N) { 5316 if (!RCG.needCleanups(N)) 5317 return nullptr; 5318 auto &C = CGM.getContext(); 5319 FunctionArgList Args; 5320 ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); 5321 Args.emplace_back(&Param); 5322 auto &FnInfo = 5323 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5324 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5325 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5326 ".red_fini.", &CGM.getModule()); 5327 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 5328 CodeGenFunction CGF(CGM); 5329 CGF.disableDebugInfo(); 5330 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 5331 Address PrivateAddr = CGF.EmitLoadOfPointer( 5332 CGF.GetAddrOfLocalVar(&Param), 5333 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5334 llvm::Value *Size = nullptr; 5335 // If the size of the reduction item is non-constant, load it from global 5336 // threadprivate variable. 5337 if (RCG.getSizes(N).second) { 5338 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5339 CGF, CGM.getContext().getSizeType(), 5340 generateUniqueName("reduction_size", Loc, N)); 5341 Size = 5342 CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5343 CGM.getContext().getSizeType(), SourceLocation()); 5344 } 5345 RCG.emitAggregateType(CGF, N, Size); 5346 // Emit the finalizer body: 5347 // <destroy>(<type>* %0) 5348 RCG.emitCleanups(CGF, N, PrivateAddr); 5349 CGF.FinishFunction(); 5350 return Fn; 5351 } 5352 5353 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5354 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5355 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5356 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5357 return nullptr; 5358 5359 // Build typedef struct: 5360 // kmp_task_red_input { 5361 // void *reduce_shar; // shared reduction item 5362 // size_t reduce_size; // size of data item 5363 // void *reduce_init; // data initialization routine 5364 // void *reduce_fini; // data finalization routine 5365 // void *reduce_comb; // data combiner routine 5366 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5367 // } kmp_task_red_input_t; 5368 ASTContext &C = CGM.getContext(); 5369 auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 5370 RD->startDefinition(); 5371 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5372 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5373 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5374 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5375 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5376 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5377 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5378 RD->completeDefinition(); 5379 QualType RDType = C.getRecordType(RD); 5380 unsigned Size = Data.ReductionVars.size(); 5381 llvm::APInt ArraySize(/*numBits=*/64, Size); 5382 QualType ArrayRDType = C.getConstantArrayType( 5383 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 5384 // kmp_task_red_input_t .rd_input.[Size]; 5385 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5386 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 5387 Data.ReductionOps); 5388 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5389 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5390 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5391 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5392 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5393 TaskRedInput.getPointer(), Idxs, 5394 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5395 ".rd_input.gep."); 5396 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5397 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5398 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5399 RCG.emitSharedLValue(CGF, Cnt); 5400 llvm::Value *CastedShared = 5401 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 5402 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5403 RCG.emitAggregateType(CGF, Cnt); 5404 llvm::Value *SizeValInChars; 5405 llvm::Value *SizeVal; 5406 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5407 // We use delayed creation/initialization for VLAs, array sections and 5408 // custom reduction initializations. It is required because runtime does not 5409 // provide the way to pass the sizes of VLAs/array sections to 5410 // initializer/combiner/finalizer functions and does not pass the pointer to 5411 // original reduction item to the initializer. Instead threadprivate global 5412 // variables are used to store these values and use them in the functions. 5413 bool DelayedCreation = !!SizeVal; 5414 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5415 /*isSigned=*/false); 5416 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5417 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5418 // ElemLVal.reduce_init = init; 5419 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5420 llvm::Value *InitAddr = 5421 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 5422 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5423 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 5424 // ElemLVal.reduce_fini = fini; 5425 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5426 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5427 llvm::Value *FiniAddr = Fini 5428 ? CGF.EmitCastToVoidPtr(Fini) 5429 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5430 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5431 // ElemLVal.reduce_comb = comb; 5432 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5433 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 5434 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5435 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 5436 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5437 // ElemLVal.flags = 0; 5438 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5439 if (DelayedCreation) { 5440 CGF.EmitStoreOfScalar( 5441 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), 5442 FlagsLVal); 5443 } else 5444 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 5445 } 5446 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 5447 // *data); 5448 llvm::Value *Args[] = { 5449 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5450 /*isSigned=*/true), 5451 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5452 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5453 CGM.VoidPtrTy)}; 5454 return CGF.EmitRuntimeCall( 5455 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 5456 } 5457 5458 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5459 SourceLocation Loc, 5460 ReductionCodeGen &RCG, 5461 unsigned N) { 5462 auto Sizes = RCG.getSizes(N); 5463 // Emit threadprivate global variable if the type is non-constant 5464 // (Sizes.second = nullptr). 5465 if (Sizes.second) { 5466 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5467 /*isSigned=*/false); 5468 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5469 CGF, CGM.getContext().getSizeType(), 5470 generateUniqueName("reduction_size", Loc, N)); 5471 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5472 } 5473 // Store address of the original reduction item if custom initializer is used. 5474 if (RCG.usesReductionInitializer(N)) { 5475 Address SharedAddr = getAddrOfArtificialThreadPrivate( 5476 CGF, CGM.getContext().VoidPtrTy, 5477 generateUniqueName("reduction", Loc, N)); 5478 CGF.Builder.CreateStore( 5479 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5480 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 5481 SharedAddr, /*IsVolatile=*/false); 5482 } 5483 } 5484 5485 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5486 SourceLocation Loc, 5487 llvm::Value *ReductionsPtr, 5488 LValue SharedLVal) { 5489 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5490 // *d); 5491 llvm::Value *Args[] = { 5492 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5493 /*isSigned=*/true), 5494 ReductionsPtr, 5495 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 5496 CGM.VoidPtrTy)}; 5497 return Address( 5498 CGF.EmitRuntimeCall( 5499 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 5500 SharedLVal.getAlignment()); 5501 } 5502 5503 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 5504 SourceLocation Loc) { 5505 if (!CGF.HaveInsertPoint()) 5506 return; 5507 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5508 // global_tid); 5509 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 5510 // Ignore return result until untied tasks are supported. 5511 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 5512 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5513 Region->emitUntiedSwitch(CGF); 5514 } 5515 5516 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5517 OpenMPDirectiveKind InnerKind, 5518 const RegionCodeGenTy &CodeGen, 5519 bool HasCancel) { 5520 if (!CGF.HaveInsertPoint()) 5521 return; 5522 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 5523 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5524 } 5525 5526 namespace { 5527 enum RTCancelKind { 5528 CancelNoreq = 0, 5529 CancelParallel = 1, 5530 CancelLoop = 2, 5531 CancelSections = 3, 5532 CancelTaskgroup = 4 5533 }; 5534 } // anonymous namespace 5535 5536 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 5537 RTCancelKind CancelKind = CancelNoreq; 5538 if (CancelRegion == OMPD_parallel) 5539 CancelKind = CancelParallel; 5540 else if (CancelRegion == OMPD_for) 5541 CancelKind = CancelLoop; 5542 else if (CancelRegion == OMPD_sections) 5543 CancelKind = CancelSections; 5544 else { 5545 assert(CancelRegion == OMPD_taskgroup); 5546 CancelKind = CancelTaskgroup; 5547 } 5548 return CancelKind; 5549 } 5550 5551 void CGOpenMPRuntime::emitCancellationPointCall( 5552 CodeGenFunction &CGF, SourceLocation Loc, 5553 OpenMPDirectiveKind CancelRegion) { 5554 if (!CGF.HaveInsertPoint()) 5555 return; 5556 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 5557 // global_tid, kmp_int32 cncl_kind); 5558 if (auto *OMPRegionInfo = 5559 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5560 // For 'cancellation point taskgroup', the task region info may not have a 5561 // cancel. This may instead happen in another adjacent task. 5562 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 5563 llvm::Value *Args[] = { 5564 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 5565 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5566 // Ignore return result until untied tasks are supported. 5567 auto *Result = CGF.EmitRuntimeCall( 5568 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 5569 // if (__kmpc_cancellationpoint()) { 5570 // exit from construct; 5571 // } 5572 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5573 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 5574 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 5575 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5576 CGF.EmitBlock(ExitBB); 5577 // exit from construct; 5578 auto CancelDest = 5579 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5580 CGF.EmitBranchThroughCleanup(CancelDest); 5581 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5582 } 5583 } 5584 } 5585 5586 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 5587 const Expr *IfCond, 5588 OpenMPDirectiveKind CancelRegion) { 5589 if (!CGF.HaveInsertPoint()) 5590 return; 5591 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 5592 // kmp_int32 cncl_kind); 5593 if (auto *OMPRegionInfo = 5594 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5595 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 5596 PrePostActionTy &) { 5597 auto &RT = CGF.CGM.getOpenMPRuntime(); 5598 llvm::Value *Args[] = { 5599 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 5600 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5601 // Ignore return result until untied tasks are supported. 5602 auto *Result = CGF.EmitRuntimeCall( 5603 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 5604 // if (__kmpc_cancel()) { 5605 // exit from construct; 5606 // } 5607 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5608 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 5609 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 5610 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5611 CGF.EmitBlock(ExitBB); 5612 // exit from construct; 5613 auto CancelDest = 5614 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5615 CGF.EmitBranchThroughCleanup(CancelDest); 5616 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5617 }; 5618 if (IfCond) 5619 emitOMPIfClause(CGF, IfCond, ThenGen, 5620 [](CodeGenFunction &, PrePostActionTy &) {}); 5621 else { 5622 RegionCodeGenTy ThenRCG(ThenGen); 5623 ThenRCG(CGF); 5624 } 5625 } 5626 } 5627 5628 /// \brief Obtain information that uniquely identifies a target entry. This 5629 /// consists of the file and device IDs as well as line number associated with 5630 /// the relevant entry source location. 5631 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 5632 unsigned &DeviceID, unsigned &FileID, 5633 unsigned &LineNum) { 5634 5635 auto &SM = C.getSourceManager(); 5636 5637 // The loc should be always valid and have a file ID (the user cannot use 5638 // #pragma directives in macros) 5639 5640 assert(Loc.isValid() && "Source location is expected to be always valid."); 5641 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 5642 5643 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 5644 assert(PLoc.isValid() && "Source location is expected to be always valid."); 5645 5646 llvm::sys::fs::UniqueID ID; 5647 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 5648 llvm_unreachable("Source file with target region no longer exists!"); 5649 5650 DeviceID = ID.getDevice(); 5651 FileID = ID.getFile(); 5652 LineNum = PLoc.getLine(); 5653 } 5654 5655 void CGOpenMPRuntime::emitTargetOutlinedFunction( 5656 const OMPExecutableDirective &D, StringRef ParentName, 5657 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5658 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5659 assert(!ParentName.empty() && "Invalid target region parent name!"); 5660 5661 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 5662 IsOffloadEntry, CodeGen); 5663 } 5664 5665 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 5666 const OMPExecutableDirective &D, StringRef ParentName, 5667 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5668 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5669 // Create a unique name for the entry function using the source location 5670 // information of the current target region. The name will be something like: 5671 // 5672 // __omp_offloading_DD_FFFF_PP_lBB 5673 // 5674 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 5675 // mangled name of the function that encloses the target region and BB is the 5676 // line number of the target region. 5677 5678 unsigned DeviceID; 5679 unsigned FileID; 5680 unsigned Line; 5681 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 5682 Line); 5683 SmallString<64> EntryFnName; 5684 { 5685 llvm::raw_svector_ostream OS(EntryFnName); 5686 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 5687 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 5688 } 5689 5690 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5691 5692 CodeGenFunction CGF(CGM, true); 5693 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 5694 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5695 5696 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 5697 5698 // If this target outline function is not an offload entry, we don't need to 5699 // register it. 5700 if (!IsOffloadEntry) 5701 return; 5702 5703 // The target region ID is used by the runtime library to identify the current 5704 // target region, so it only has to be unique and not necessarily point to 5705 // anything. It could be the pointer to the outlined function that implements 5706 // the target region, but we aren't using that so that the compiler doesn't 5707 // need to keep that, and could therefore inline the host function if proven 5708 // worthwhile during optimization. In the other hand, if emitting code for the 5709 // device, the ID has to be the function address so that it can retrieved from 5710 // the offloading entry and launched by the runtime library. We also mark the 5711 // outlined function to have external linkage in case we are emitting code for 5712 // the device, because these functions will be entry points to the device. 5713 5714 if (CGM.getLangOpts().OpenMPIsDevice) { 5715 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 5716 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 5717 } else 5718 OutlinedFnID = new llvm::GlobalVariable( 5719 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 5720 llvm::GlobalValue::PrivateLinkage, 5721 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 5722 5723 // Register the information for the entry associated with this target region. 5724 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 5725 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 5726 /*Flags=*/0); 5727 } 5728 5729 /// discard all CompoundStmts intervening between two constructs 5730 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 5731 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 5732 Body = CS->body_front(); 5733 5734 return Body; 5735 } 5736 5737 /// Emit the number of teams for a target directive. Inspect the num_teams 5738 /// clause associated with a teams construct combined or closely nested 5739 /// with the target directive. 5740 /// 5741 /// Emit a team of size one for directives such as 'target parallel' that 5742 /// have no associated teams construct. 5743 /// 5744 /// Otherwise, return nullptr. 5745 static llvm::Value * 5746 emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 5747 CodeGenFunction &CGF, 5748 const OMPExecutableDirective &D) { 5749 5750 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 5751 "teams directive expected to be " 5752 "emitted only for the host!"); 5753 5754 auto &Bld = CGF.Builder; 5755 5756 // If the target directive is combined with a teams directive: 5757 // Return the value in the num_teams clause, if any. 5758 // Otherwise, return 0 to denote the runtime default. 5759 if (isOpenMPTeamsDirective(D.getDirectiveKind())) { 5760 if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { 5761 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 5762 auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), 5763 /*IgnoreResultAssign*/ true); 5764 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 5765 /*IsSigned=*/true); 5766 } 5767 5768 // The default value is 0. 5769 return Bld.getInt32(0); 5770 } 5771 5772 // If the target directive is combined with a parallel directive but not a 5773 // teams directive, start one team. 5774 if (isOpenMPParallelDirective(D.getDirectiveKind())) 5775 return Bld.getInt32(1); 5776 5777 // If the current target region has a teams region enclosed, we need to get 5778 // the number of teams to pass to the runtime function call. This is done 5779 // by generating the expression in a inlined region. This is required because 5780 // the expression is captured in the enclosing target environment when the 5781 // teams directive is not combined with target. 5782 5783 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5784 5785 // FIXME: Accommodate other combined directives with teams when they become 5786 // available. 5787 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 5788 ignoreCompoundStmts(CS.getCapturedStmt()))) { 5789 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 5790 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 5791 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5792 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 5793 return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, 5794 /*IsSigned=*/true); 5795 } 5796 5797 // If we have an enclosed teams directive but no num_teams clause we use 5798 // the default value 0. 5799 return Bld.getInt32(0); 5800 } 5801 5802 // No teams associated with the directive. 5803 return nullptr; 5804 } 5805 5806 /// Emit the number of threads for a target directive. Inspect the 5807 /// thread_limit clause associated with a teams construct combined or closely 5808 /// nested with the target directive. 5809 /// 5810 /// Emit the num_threads clause for directives such as 'target parallel' that 5811 /// have no associated teams construct. 5812 /// 5813 /// Otherwise, return nullptr. 5814 static llvm::Value * 5815 emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, 5816 CodeGenFunction &CGF, 5817 const OMPExecutableDirective &D) { 5818 5819 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 5820 "teams directive expected to be " 5821 "emitted only for the host!"); 5822 5823 auto &Bld = CGF.Builder; 5824 5825 // 5826 // If the target directive is combined with a teams directive: 5827 // Return the value in the thread_limit clause, if any. 5828 // 5829 // If the target directive is combined with a parallel directive: 5830 // Return the value in the num_threads clause, if any. 5831 // 5832 // If both clauses are set, select the minimum of the two. 5833 // 5834 // If neither teams or parallel combined directives set the number of threads 5835 // in a team, return 0 to denote the runtime default. 5836 // 5837 // If this is not a teams directive return nullptr. 5838 5839 if (isOpenMPTeamsDirective(D.getDirectiveKind()) || 5840 isOpenMPParallelDirective(D.getDirectiveKind())) { 5841 llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); 5842 llvm::Value *NumThreadsVal = nullptr; 5843 llvm::Value *ThreadLimitVal = nullptr; 5844 5845 if (const auto *ThreadLimitClause = 5846 D.getSingleClause<OMPThreadLimitClause>()) { 5847 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 5848 auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), 5849 /*IgnoreResultAssign*/ true); 5850 ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, 5851 /*IsSigned=*/true); 5852 } 5853 5854 if (const auto *NumThreadsClause = 5855 D.getSingleClause<OMPNumThreadsClause>()) { 5856 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 5857 llvm::Value *NumThreads = 5858 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 5859 /*IgnoreResultAssign*/ true); 5860 NumThreadsVal = 5861 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); 5862 } 5863 5864 // Select the lesser of thread_limit and num_threads. 5865 if (NumThreadsVal) 5866 ThreadLimitVal = ThreadLimitVal 5867 ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, 5868 ThreadLimitVal), 5869 NumThreadsVal, ThreadLimitVal) 5870 : NumThreadsVal; 5871 5872 // Set default value passed to the runtime if either teams or a target 5873 // parallel type directive is found but no clause is specified. 5874 if (!ThreadLimitVal) 5875 ThreadLimitVal = DefaultThreadLimitVal; 5876 5877 return ThreadLimitVal; 5878 } 5879 5880 // If the current target region has a teams region enclosed, we need to get 5881 // the thread limit to pass to the runtime function call. This is done 5882 // by generating the expression in a inlined region. This is required because 5883 // the expression is captured in the enclosing target environment when the 5884 // teams directive is not combined with target. 5885 5886 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5887 5888 // FIXME: Accommodate other combined directives with teams when they become 5889 // available. 5890 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 5891 ignoreCompoundStmts(CS.getCapturedStmt()))) { 5892 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 5893 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 5894 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5895 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 5896 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 5897 /*IsSigned=*/true); 5898 } 5899 5900 // If we have an enclosed teams directive but no thread_limit clause we use 5901 // the default value 0. 5902 return CGF.Builder.getInt32(0); 5903 } 5904 5905 // No teams associated with the directive. 5906 return nullptr; 5907 } 5908 5909 namespace { 5910 // \brief Utility to handle information from clauses associated with a given 5911 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 5912 // It provides a convenient interface to obtain the information and generate 5913 // code for that information. 5914 class MappableExprsHandler { 5915 public: 5916 /// \brief Values for bit flags used to specify the mapping type for 5917 /// offloading. 5918 enum OpenMPOffloadMappingFlags { 5919 /// \brief Allocate memory on the device and move data from host to device. 5920 OMP_MAP_TO = 0x01, 5921 /// \brief Allocate memory on the device and move data from device to host. 5922 OMP_MAP_FROM = 0x02, 5923 /// \brief Always perform the requested mapping action on the element, even 5924 /// if it was already mapped before. 5925 OMP_MAP_ALWAYS = 0x04, 5926 /// \brief Delete the element from the device environment, ignoring the 5927 /// current reference count associated with the element. 5928 OMP_MAP_DELETE = 0x08, 5929 /// \brief The element being mapped is a pointer, therefore the pointee 5930 /// should be mapped as well. 5931 OMP_MAP_IS_PTR = 0x10, 5932 /// \brief This flags signals that an argument is the first one relating to 5933 /// a map/private clause expression. For some cases a single 5934 /// map/privatization results in multiple arguments passed to the runtime 5935 /// library. 5936 OMP_MAP_FIRST_REF = 0x20, 5937 /// \brief Signal that the runtime library has to return the device pointer 5938 /// in the current position for the data being mapped. 5939 OMP_MAP_RETURN_PTR = 0x40, 5940 /// \brief This flag signals that the reference being passed is a pointer to 5941 /// private data. 5942 OMP_MAP_PRIVATE_PTR = 0x80, 5943 /// \brief Pass the element to the device by value. 5944 OMP_MAP_PRIVATE_VAL = 0x100, 5945 }; 5946 5947 /// Class that associates information with a base pointer to be passed to the 5948 /// runtime library. 5949 class BasePointerInfo { 5950 /// The base pointer. 5951 llvm::Value *Ptr = nullptr; 5952 /// The base declaration that refers to this device pointer, or null if 5953 /// there is none. 5954 const ValueDecl *DevPtrDecl = nullptr; 5955 5956 public: 5957 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 5958 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 5959 llvm::Value *operator*() const { return Ptr; } 5960 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 5961 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 5962 }; 5963 5964 typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; 5965 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 5966 typedef SmallVector<unsigned, 16> MapFlagsArrayTy; 5967 5968 private: 5969 /// \brief Directive from where the map clauses were extracted. 5970 const OMPExecutableDirective &CurDir; 5971 5972 /// \brief Function the directive is being generated for. 5973 CodeGenFunction &CGF; 5974 5975 /// \brief Set of all first private variables in the current directive. 5976 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; 5977 5978 /// Map between device pointer declarations and their expression components. 5979 /// The key value for declarations in 'this' is null. 5980 llvm::DenseMap< 5981 const ValueDecl *, 5982 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 5983 DevPointersMap; 5984 5985 llvm::Value *getExprTypeSize(const Expr *E) const { 5986 auto ExprTy = E->getType().getCanonicalType(); 5987 5988 // Reference types are ignored for mapping purposes. 5989 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 5990 ExprTy = RefTy->getPointeeType().getCanonicalType(); 5991 5992 // Given that an array section is considered a built-in type, we need to 5993 // do the calculation based on the length of the section instead of relying 5994 // on CGF.getTypeSize(E->getType()). 5995 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 5996 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 5997 OAE->getBase()->IgnoreParenImpCasts()) 5998 .getCanonicalType(); 5999 6000 // If there is no length associated with the expression, that means we 6001 // are using the whole length of the base. 6002 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 6003 return CGF.getTypeSize(BaseTy); 6004 6005 llvm::Value *ElemSize; 6006 if (auto *PTy = BaseTy->getAs<PointerType>()) 6007 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6008 else { 6009 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6010 assert(ATy && "Expecting array type if not a pointer type."); 6011 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6012 } 6013 6014 // If we don't have a length at this point, that is because we have an 6015 // array section with a single element. 6016 if (!OAE->getLength()) 6017 return ElemSize; 6018 6019 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 6020 LengthVal = 6021 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 6022 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6023 } 6024 return CGF.getTypeSize(ExprTy); 6025 } 6026 6027 /// \brief Return the corresponding bits for a given map clause modifier. Add 6028 /// a flag marking the map as a pointer if requested. Add a flag marking the 6029 /// map as the first one of a series of maps that relate to the same map 6030 /// expression. 6031 unsigned getMapTypeBits(OpenMPMapClauseKind MapType, 6032 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 6033 bool AddIsFirstFlag) const { 6034 unsigned Bits = 0u; 6035 switch (MapType) { 6036 case OMPC_MAP_alloc: 6037 case OMPC_MAP_release: 6038 // alloc and release is the default behavior in the runtime library, i.e. 6039 // if we don't pass any bits alloc/release that is what the runtime is 6040 // going to do. Therefore, we don't need to signal anything for these two 6041 // type modifiers. 6042 break; 6043 case OMPC_MAP_to: 6044 Bits = OMP_MAP_TO; 6045 break; 6046 case OMPC_MAP_from: 6047 Bits = OMP_MAP_FROM; 6048 break; 6049 case OMPC_MAP_tofrom: 6050 Bits = OMP_MAP_TO | OMP_MAP_FROM; 6051 break; 6052 case OMPC_MAP_delete: 6053 Bits = OMP_MAP_DELETE; 6054 break; 6055 default: 6056 llvm_unreachable("Unexpected map type!"); 6057 break; 6058 } 6059 if (AddPtrFlag) 6060 Bits |= OMP_MAP_IS_PTR; 6061 if (AddIsFirstFlag) 6062 Bits |= OMP_MAP_FIRST_REF; 6063 if (MapTypeModifier == OMPC_MAP_always) 6064 Bits |= OMP_MAP_ALWAYS; 6065 return Bits; 6066 } 6067 6068 /// \brief Return true if the provided expression is a final array section. A 6069 /// final array section, is one whose length can't be proved to be one. 6070 bool isFinalArraySectionExpression(const Expr *E) const { 6071 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 6072 6073 // It is not an array section and therefore not a unity-size one. 6074 if (!OASE) 6075 return false; 6076 6077 // An array section with no colon always refer to a single element. 6078 if (OASE->getColonLoc().isInvalid()) 6079 return false; 6080 6081 auto *Length = OASE->getLength(); 6082 6083 // If we don't have a length we have to check if the array has size 1 6084 // for this dimension. Also, we should always expect a length if the 6085 // base type is pointer. 6086 if (!Length) { 6087 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 6088 OASE->getBase()->IgnoreParenImpCasts()) 6089 .getCanonicalType(); 6090 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6091 return ATy->getSize().getSExtValue() != 1; 6092 // If we don't have a constant dimension length, we have to consider 6093 // the current section as having any size, so it is not necessarily 6094 // unitary. If it happen to be unity size, that's user fault. 6095 return true; 6096 } 6097 6098 // Check if the length evaluates to 1. 6099 llvm::APSInt ConstLength; 6100 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 6101 return true; // Can have more that size 1. 6102 6103 return ConstLength.getSExtValue() != 1; 6104 } 6105 6106 /// \brief Generate the base pointers, section pointers, sizes and map type 6107 /// bits for the provided map type, map modifier, and expression components. 6108 /// \a IsFirstComponent should be set to true if the provided set of 6109 /// components is the first associated with a capture. 6110 void generateInfoForComponentList( 6111 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6112 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6113 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 6114 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 6115 bool IsFirstComponentList) const { 6116 6117 // The following summarizes what has to be generated for each map and the 6118 // types bellow. The generated information is expressed in this order: 6119 // base pointer, section pointer, size, flags 6120 // (to add to the ones that come from the map type and modifier). 6121 // 6122 // double d; 6123 // int i[100]; 6124 // float *p; 6125 // 6126 // struct S1 { 6127 // int i; 6128 // float f[50]; 6129 // } 6130 // struct S2 { 6131 // int i; 6132 // float f[50]; 6133 // S1 s; 6134 // double *p; 6135 // struct S2 *ps; 6136 // } 6137 // S2 s; 6138 // S2 *ps; 6139 // 6140 // map(d) 6141 // &d, &d, sizeof(double), noflags 6142 // 6143 // map(i) 6144 // &i, &i, 100*sizeof(int), noflags 6145 // 6146 // map(i[1:23]) 6147 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 6148 // 6149 // map(p) 6150 // &p, &p, sizeof(float*), noflags 6151 // 6152 // map(p[1:24]) 6153 // p, &p[1], 24*sizeof(float), noflags 6154 // 6155 // map(s) 6156 // &s, &s, sizeof(S2), noflags 6157 // 6158 // map(s.i) 6159 // &s, &(s.i), sizeof(int), noflags 6160 // 6161 // map(s.s.f) 6162 // &s, &(s.i.f), 50*sizeof(int), noflags 6163 // 6164 // map(s.p) 6165 // &s, &(s.p), sizeof(double*), noflags 6166 // 6167 // map(s.p[:22], s.a s.b) 6168 // &s, &(s.p), sizeof(double*), noflags 6169 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag 6170 // 6171 // map(s.ps) 6172 // &s, &(s.ps), sizeof(S2*), noflags 6173 // 6174 // map(s.ps->s.i) 6175 // &s, &(s.ps), sizeof(S2*), noflags 6176 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag 6177 // 6178 // map(s.ps->ps) 6179 // &s, &(s.ps), sizeof(S2*), noflags 6180 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 6181 // 6182 // map(s.ps->ps->ps) 6183 // &s, &(s.ps), sizeof(S2*), noflags 6184 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 6185 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 6186 // 6187 // map(s.ps->ps->s.f[:22]) 6188 // &s, &(s.ps), sizeof(S2*), noflags 6189 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 6190 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag 6191 // 6192 // map(ps) 6193 // &ps, &ps, sizeof(S2*), noflags 6194 // 6195 // map(ps->i) 6196 // ps, &(ps->i), sizeof(int), noflags 6197 // 6198 // map(ps->s.f) 6199 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 6200 // 6201 // map(ps->p) 6202 // ps, &(ps->p), sizeof(double*), noflags 6203 // 6204 // map(ps->p[:22]) 6205 // ps, &(ps->p), sizeof(double*), noflags 6206 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag 6207 // 6208 // map(ps->ps) 6209 // ps, &(ps->ps), sizeof(S2*), noflags 6210 // 6211 // map(ps->ps->s.i) 6212 // ps, &(ps->ps), sizeof(S2*), noflags 6213 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag 6214 // 6215 // map(ps->ps->ps) 6216 // ps, &(ps->ps), sizeof(S2*), noflags 6217 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 6218 // 6219 // map(ps->ps->ps->ps) 6220 // ps, &(ps->ps), sizeof(S2*), noflags 6221 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 6222 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 6223 // 6224 // map(ps->ps->ps->s.f[:22]) 6225 // ps, &(ps->ps), sizeof(S2*), noflags 6226 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 6227 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + 6228 // extra_flag 6229 6230 // Track if the map information being generated is the first for a capture. 6231 bool IsCaptureFirstInfo = IsFirstComponentList; 6232 6233 // Scan the components from the base to the complete expression. 6234 auto CI = Components.rbegin(); 6235 auto CE = Components.rend(); 6236 auto I = CI; 6237 6238 // Track if the map information being generated is the first for a list of 6239 // components. 6240 bool IsExpressionFirstInfo = true; 6241 llvm::Value *BP = nullptr; 6242 6243 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 6244 // The base is the 'this' pointer. The content of the pointer is going 6245 // to be the base of the field being mapped. 6246 BP = CGF.EmitScalarExpr(ME->getBase()); 6247 } else { 6248 // The base is the reference to the variable. 6249 // BP = &Var. 6250 BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) 6251 .getPointer(); 6252 6253 // If the variable is a pointer and is being dereferenced (i.e. is not 6254 // the last component), the base has to be the pointer itself, not its 6255 // reference. References are ignored for mapping purposes. 6256 QualType Ty = 6257 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 6258 if (Ty->isAnyPointerType() && std::next(I) != CE) { 6259 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); 6260 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 6261 Ty->castAs<PointerType>()) 6262 .getPointer(); 6263 6264 // We do not need to generate individual map information for the 6265 // pointer, it can be associated with the combined storage. 6266 ++I; 6267 } 6268 } 6269 6270 for (; I != CE; ++I) { 6271 auto Next = std::next(I); 6272 6273 // We need to generate the addresses and sizes if this is the last 6274 // component, if the component is a pointer or if it is an array section 6275 // whose length can't be proved to be one. If this is a pointer, it 6276 // becomes the base address for the following components. 6277 6278 // A final array section, is one whose length can't be proved to be one. 6279 bool IsFinalArraySection = 6280 isFinalArraySectionExpression(I->getAssociatedExpression()); 6281 6282 // Get information on whether the element is a pointer. Have to do a 6283 // special treatment for array sections given that they are built-in 6284 // types. 6285 const auto *OASE = 6286 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 6287 bool IsPointer = 6288 (OASE && 6289 OMPArraySectionExpr::getBaseOriginalType(OASE) 6290 .getCanonicalType() 6291 ->isAnyPointerType()) || 6292 I->getAssociatedExpression()->getType()->isAnyPointerType(); 6293 6294 if (Next == CE || IsPointer || IsFinalArraySection) { 6295 6296 // If this is not the last component, we expect the pointer to be 6297 // associated with an array expression or member expression. 6298 assert((Next == CE || 6299 isa<MemberExpr>(Next->getAssociatedExpression()) || 6300 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 6301 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 6302 "Unexpected expression"); 6303 6304 auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); 6305 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 6306 6307 // If we have a member expression and the current component is a 6308 // reference, we have to map the reference too. Whenever we have a 6309 // reference, the section that reference refers to is going to be a 6310 // load instruction from the storage assigned to the reference. 6311 if (isa<MemberExpr>(I->getAssociatedExpression()) && 6312 I->getAssociatedDeclaration()->getType()->isReferenceType()) { 6313 auto *LI = cast<llvm::LoadInst>(LB); 6314 auto *RefAddr = LI->getPointerOperand(); 6315 6316 BasePointers.push_back(BP); 6317 Pointers.push_back(RefAddr); 6318 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 6319 Types.push_back(getMapTypeBits( 6320 /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, 6321 !IsExpressionFirstInfo, IsCaptureFirstInfo)); 6322 IsExpressionFirstInfo = false; 6323 IsCaptureFirstInfo = false; 6324 // The reference will be the next base address. 6325 BP = RefAddr; 6326 } 6327 6328 BasePointers.push_back(BP); 6329 Pointers.push_back(LB); 6330 Sizes.push_back(Size); 6331 6332 // We need to add a pointer flag for each map that comes from the 6333 // same expression except for the first one. We also need to signal 6334 // this map is the first one that relates with the current capture 6335 // (there is a set of entries for each capture). 6336 Types.push_back(getMapTypeBits(MapType, MapTypeModifier, 6337 !IsExpressionFirstInfo, 6338 IsCaptureFirstInfo)); 6339 6340 // If we have a final array section, we are done with this expression. 6341 if (IsFinalArraySection) 6342 break; 6343 6344 // The pointer becomes the base for the next element. 6345 if (Next != CE) 6346 BP = LB; 6347 6348 IsExpressionFirstInfo = false; 6349 IsCaptureFirstInfo = false; 6350 continue; 6351 } 6352 } 6353 } 6354 6355 /// \brief Return the adjusted map modifiers if the declaration a capture 6356 /// refers to appears in a first-private clause. This is expected to be used 6357 /// only with directives that start with 'target'. 6358 unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, 6359 unsigned CurrentModifiers) { 6360 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 6361 6362 // A first private variable captured by reference will use only the 6363 // 'private ptr' and 'map to' flag. Return the right flags if the captured 6364 // declaration is known as first-private in this handler. 6365 if (FirstPrivateDecls.count(Cap.getCapturedVar())) 6366 return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | 6367 MappableExprsHandler::OMP_MAP_TO; 6368 6369 // We didn't modify anything. 6370 return CurrentModifiers; 6371 } 6372 6373 public: 6374 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 6375 : CurDir(Dir), CGF(CGF) { 6376 // Extract firstprivate clause information. 6377 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 6378 for (const auto *D : C->varlists()) 6379 FirstPrivateDecls.insert( 6380 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 6381 // Extract device pointer clause information. 6382 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 6383 for (auto L : C->component_lists()) 6384 DevPointersMap[L.first].push_back(L.second); 6385 } 6386 6387 /// \brief Generate all the base pointers, section pointers, sizes and map 6388 /// types for the extracted mappable expressions. Also, for each item that 6389 /// relates with a device pointer, a pair of the relevant declaration and 6390 /// index where it occurs is appended to the device pointers info array. 6391 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 6392 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 6393 MapFlagsArrayTy &Types) const { 6394 BasePointers.clear(); 6395 Pointers.clear(); 6396 Sizes.clear(); 6397 Types.clear(); 6398 6399 struct MapInfo { 6400 /// Kind that defines how a device pointer has to be returned. 6401 enum ReturnPointerKind { 6402 // Don't have to return any pointer. 6403 RPK_None, 6404 // Pointer is the base of the declaration. 6405 RPK_Base, 6406 // Pointer is a member of the base declaration - 'this' 6407 RPK_Member, 6408 // Pointer is a reference and a member of the base declaration - 'this' 6409 RPK_MemberReference, 6410 }; 6411 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6412 OpenMPMapClauseKind MapType; 6413 OpenMPMapClauseKind MapTypeModifier; 6414 ReturnPointerKind ReturnDevicePointer; 6415 6416 MapInfo() 6417 : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), 6418 ReturnDevicePointer(RPK_None) {} 6419 MapInfo( 6420 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6421 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 6422 ReturnPointerKind ReturnDevicePointer) 6423 : Components(Components), MapType(MapType), 6424 MapTypeModifier(MapTypeModifier), 6425 ReturnDevicePointer(ReturnDevicePointer) {} 6426 }; 6427 6428 // We have to process the component lists that relate with the same 6429 // declaration in a single chunk so that we can generate the map flags 6430 // correctly. Therefore, we organize all lists in a map. 6431 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 6432 6433 // Helper function to fill the information map for the different supported 6434 // clauses. 6435 auto &&InfoGen = [&Info]( 6436 const ValueDecl *D, 6437 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 6438 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, 6439 MapInfo::ReturnPointerKind ReturnDevicePointer) { 6440 const ValueDecl *VD = 6441 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 6442 Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); 6443 }; 6444 6445 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6446 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 6447 for (auto L : C->component_lists()) 6448 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), 6449 MapInfo::RPK_None); 6450 for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 6451 for (auto L : C->component_lists()) 6452 InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, 6453 MapInfo::RPK_None); 6454 for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 6455 for (auto L : C->component_lists()) 6456 InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, 6457 MapInfo::RPK_None); 6458 6459 // Look at the use_device_ptr clause information and mark the existing map 6460 // entries as such. If there is no map information for an entry in the 6461 // use_device_ptr list, we create one with map type 'alloc' and zero size 6462 // section. It is the user fault if that was not mapped before. 6463 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6464 for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) 6465 for (auto L : C->component_lists()) { 6466 assert(!L.second.empty() && "Not expecting empty list of components!"); 6467 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 6468 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 6469 auto *IE = L.second.back().getAssociatedExpression(); 6470 // If the first component is a member expression, we have to look into 6471 // 'this', which maps to null in the map of map information. Otherwise 6472 // look directly for the information. 6473 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 6474 6475 // We potentially have map information for this declaration already. 6476 // Look for the first set of components that refer to it. 6477 if (It != Info.end()) { 6478 auto CI = std::find_if( 6479 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 6480 return MI.Components.back().getAssociatedDeclaration() == VD; 6481 }); 6482 // If we found a map entry, signal that the pointer has to be returned 6483 // and move on to the next declaration. 6484 if (CI != It->second.end()) { 6485 CI->ReturnDevicePointer = isa<MemberExpr>(IE) 6486 ? (VD->getType()->isReferenceType() 6487 ? MapInfo::RPK_MemberReference 6488 : MapInfo::RPK_Member) 6489 : MapInfo::RPK_Base; 6490 continue; 6491 } 6492 } 6493 6494 // We didn't find any match in our map information - generate a zero 6495 // size array section. 6496 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 6497 llvm::Value *Ptr = 6498 this->CGF 6499 .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation()) 6500 .getScalarVal(); 6501 BasePointers.push_back({Ptr, VD}); 6502 Pointers.push_back(Ptr); 6503 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); 6504 Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); 6505 } 6506 6507 for (auto &M : Info) { 6508 // We need to know when we generate information for the first component 6509 // associated with a capture, because the mapping flags depend on it. 6510 bool IsFirstComponentList = true; 6511 for (MapInfo &L : M.second) { 6512 assert(!L.Components.empty() && 6513 "Not expecting declaration with no component lists."); 6514 6515 // Remember the current base pointer index. 6516 unsigned CurrentBasePointersIdx = BasePointers.size(); 6517 // FIXME: MSVC 2013 seems to require this-> to find the member method. 6518 this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, 6519 L.Components, BasePointers, Pointers, 6520 Sizes, Types, IsFirstComponentList); 6521 6522 // If this entry relates with a device pointer, set the relevant 6523 // declaration and add the 'return pointer' flag. 6524 if (IsFirstComponentList && 6525 L.ReturnDevicePointer != MapInfo::RPK_None) { 6526 // If the pointer is not the base of the map, we need to skip the 6527 // base. If it is a reference in a member field, we also need to skip 6528 // the map of the reference. 6529 if (L.ReturnDevicePointer != MapInfo::RPK_Base) { 6530 ++CurrentBasePointersIdx; 6531 if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) 6532 ++CurrentBasePointersIdx; 6533 } 6534 assert(BasePointers.size() > CurrentBasePointersIdx && 6535 "Unexpected number of mapped base pointers."); 6536 6537 auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); 6538 assert(RelevantVD && 6539 "No relevant declaration related with device pointer??"); 6540 6541 BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 6542 Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; 6543 } 6544 IsFirstComponentList = false; 6545 } 6546 } 6547 } 6548 6549 /// \brief Generate the base pointers, section pointers, sizes and map types 6550 /// associated to a given capture. 6551 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 6552 llvm::Value *Arg, 6553 MapBaseValuesArrayTy &BasePointers, 6554 MapValuesArrayTy &Pointers, 6555 MapValuesArrayTy &Sizes, 6556 MapFlagsArrayTy &Types) const { 6557 assert(!Cap->capturesVariableArrayType() && 6558 "Not expecting to generate map info for a variable array type!"); 6559 6560 BasePointers.clear(); 6561 Pointers.clear(); 6562 Sizes.clear(); 6563 Types.clear(); 6564 6565 // We need to know when we generating information for the first component 6566 // associated with a capture, because the mapping flags depend on it. 6567 bool IsFirstComponentList = true; 6568 6569 const ValueDecl *VD = 6570 Cap->capturesThis() 6571 ? nullptr 6572 : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); 6573 6574 // If this declaration appears in a is_device_ptr clause we just have to 6575 // pass the pointer by value. If it is a reference to a declaration, we just 6576 // pass its value, otherwise, if it is a member expression, we need to map 6577 // 'to' the field. 6578 if (!VD) { 6579 auto It = DevPointersMap.find(VD); 6580 if (It != DevPointersMap.end()) { 6581 for (auto L : It->second) { 6582 generateInfoForComponentList( 6583 /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, 6584 BasePointers, Pointers, Sizes, Types, IsFirstComponentList); 6585 IsFirstComponentList = false; 6586 } 6587 return; 6588 } 6589 } else if (DevPointersMap.count(VD)) { 6590 BasePointers.push_back({Arg, VD}); 6591 Pointers.push_back(Arg); 6592 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); 6593 Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF); 6594 return; 6595 } 6596 6597 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 6598 for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 6599 for (auto L : C->decl_component_lists(VD)) { 6600 assert(L.first == VD && 6601 "We got information for the wrong declaration??"); 6602 assert(!L.second.empty() && 6603 "Not expecting declaration with no component lists."); 6604 generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), 6605 L.second, BasePointers, Pointers, Sizes, 6606 Types, IsFirstComponentList); 6607 IsFirstComponentList = false; 6608 } 6609 6610 return; 6611 } 6612 6613 /// \brief Generate the default map information for a given capture \a CI, 6614 /// record field declaration \a RI and captured value \a CV. 6615 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 6616 const FieldDecl &RI, llvm::Value *CV, 6617 MapBaseValuesArrayTy &CurBasePointers, 6618 MapValuesArrayTy &CurPointers, 6619 MapValuesArrayTy &CurSizes, 6620 MapFlagsArrayTy &CurMapTypes) { 6621 6622 // Do the default mapping. 6623 if (CI.capturesThis()) { 6624 CurBasePointers.push_back(CV); 6625 CurPointers.push_back(CV); 6626 const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 6627 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 6628 // Default map type. 6629 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 6630 } else if (CI.capturesVariableByCopy()) { 6631 CurBasePointers.push_back(CV); 6632 CurPointers.push_back(CV); 6633 if (!RI.getType()->isAnyPointerType()) { 6634 // We have to signal to the runtime captures passed by value that are 6635 // not pointers. 6636 CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); 6637 CurSizes.push_back(CGF.getTypeSize(RI.getType())); 6638 } else { 6639 // Pointers are implicitly mapped with a zero size and no flags 6640 // (other than first map that is added for all implicit maps). 6641 CurMapTypes.push_back(0u); 6642 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); 6643 } 6644 } else { 6645 assert(CI.capturesVariable() && "Expected captured reference."); 6646 CurBasePointers.push_back(CV); 6647 CurPointers.push_back(CV); 6648 6649 const ReferenceType *PtrTy = 6650 cast<ReferenceType>(RI.getType().getTypePtr()); 6651 QualType ElementType = PtrTy->getPointeeType(); 6652 CurSizes.push_back(CGF.getTypeSize(ElementType)); 6653 // The default map type for a scalar/complex type is 'to' because by 6654 // default the value doesn't have to be retrieved. For an aggregate 6655 // type, the default is 'tofrom'. 6656 CurMapTypes.push_back(ElementType->isAggregateType() 6657 ? (OMP_MAP_TO | OMP_MAP_FROM) 6658 : OMP_MAP_TO); 6659 6660 // If we have a capture by reference we may need to add the private 6661 // pointer flag if the base declaration shows in some first-private 6662 // clause. 6663 CurMapTypes.back() = 6664 adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); 6665 } 6666 // Every default map produces a single argument, so, it is always the 6667 // first one. 6668 CurMapTypes.back() |= OMP_MAP_FIRST_REF; 6669 } 6670 }; 6671 6672 enum OpenMPOffloadingReservedDeviceIDs { 6673 /// \brief Device ID if the device was not defined, runtime should get it 6674 /// from environment variables in the spec. 6675 OMP_DEVICEID_UNDEF = -1, 6676 }; 6677 } // anonymous namespace 6678 6679 /// \brief Emit the arrays used to pass the captures and map information to the 6680 /// offloading runtime library. If there is no map or capture information, 6681 /// return nullptr by reference. 6682 static void 6683 emitOffloadingArrays(CodeGenFunction &CGF, 6684 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 6685 MappableExprsHandler::MapValuesArrayTy &Pointers, 6686 MappableExprsHandler::MapValuesArrayTy &Sizes, 6687 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 6688 CGOpenMPRuntime::TargetDataInfo &Info) { 6689 auto &CGM = CGF.CGM; 6690 auto &Ctx = CGF.getContext(); 6691 6692 // Reset the array information. 6693 Info.clearArrayInfo(); 6694 Info.NumberOfPtrs = BasePointers.size(); 6695 6696 if (Info.NumberOfPtrs) { 6697 // Detect if we have any capture size requiring runtime evaluation of the 6698 // size so that a constant array could be eventually used. 6699 bool hasRuntimeEvaluationCaptureSize = false; 6700 for (auto *S : Sizes) 6701 if (!isa<llvm::Constant>(S)) { 6702 hasRuntimeEvaluationCaptureSize = true; 6703 break; 6704 } 6705 6706 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 6707 QualType PointerArrayType = 6708 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 6709 /*IndexTypeQuals=*/0); 6710 6711 Info.BasePointersArray = 6712 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 6713 Info.PointersArray = 6714 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 6715 6716 // If we don't have any VLA types or other types that require runtime 6717 // evaluation, we can use a constant array for the map sizes, otherwise we 6718 // need to fill up the arrays as we do for the pointers. 6719 if (hasRuntimeEvaluationCaptureSize) { 6720 QualType SizeArrayType = Ctx.getConstantArrayType( 6721 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 6722 /*IndexTypeQuals=*/0); 6723 Info.SizesArray = 6724 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 6725 } else { 6726 // We expect all the sizes to be constant, so we collect them to create 6727 // a constant array. 6728 SmallVector<llvm::Constant *, 16> ConstSizes; 6729 for (auto S : Sizes) 6730 ConstSizes.push_back(cast<llvm::Constant>(S)); 6731 6732 auto *SizesArrayInit = llvm::ConstantArray::get( 6733 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 6734 auto *SizesArrayGbl = new llvm::GlobalVariable( 6735 CGM.getModule(), SizesArrayInit->getType(), 6736 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 6737 SizesArrayInit, ".offload_sizes"); 6738 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 6739 Info.SizesArray = SizesArrayGbl; 6740 } 6741 6742 // The map types are always constant so we don't need to generate code to 6743 // fill arrays. Instead, we create an array constant. 6744 llvm::Constant *MapTypesArrayInit = 6745 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 6746 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 6747 CGM.getModule(), MapTypesArrayInit->getType(), 6748 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 6749 MapTypesArrayInit, ".offload_maptypes"); 6750 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 6751 Info.MapTypesArray = MapTypesArrayGbl; 6752 6753 for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { 6754 llvm::Value *BPVal = *BasePointers[i]; 6755 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 6756 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 6757 Info.BasePointersArray, 0, i); 6758 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6759 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 6760 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 6761 CGF.Builder.CreateStore(BPVal, BPAddr); 6762 6763 if (Info.requiresDevicePointerInfo()) 6764 if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) 6765 Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); 6766 6767 llvm::Value *PVal = Pointers[i]; 6768 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 6769 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 6770 Info.PointersArray, 0, i); 6771 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6772 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 6773 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 6774 CGF.Builder.CreateStore(PVal, PAddr); 6775 6776 if (hasRuntimeEvaluationCaptureSize) { 6777 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 6778 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), 6779 Info.SizesArray, 6780 /*Idx0=*/0, 6781 /*Idx1=*/i); 6782 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 6783 CGF.Builder.CreateStore( 6784 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 6785 SAddr); 6786 } 6787 } 6788 } 6789 } 6790 /// \brief Emit the arguments to be passed to the runtime library based on the 6791 /// arrays of pointers, sizes and map types. 6792 static void emitOffloadingArraysArgument( 6793 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 6794 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 6795 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 6796 auto &CGM = CGF.CGM; 6797 if (Info.NumberOfPtrs) { 6798 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 6799 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 6800 Info.BasePointersArray, 6801 /*Idx0=*/0, /*Idx1=*/0); 6802 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 6803 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 6804 Info.PointersArray, 6805 /*Idx0=*/0, 6806 /*Idx1=*/0); 6807 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 6808 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, 6809 /*Idx0=*/0, /*Idx1=*/0); 6810 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 6811 llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), 6812 Info.MapTypesArray, 6813 /*Idx0=*/0, 6814 /*Idx1=*/0); 6815 } else { 6816 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 6817 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 6818 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 6819 MapTypesArrayArg = 6820 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 6821 } 6822 } 6823 6824 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 6825 const OMPExecutableDirective &D, 6826 llvm::Value *OutlinedFn, 6827 llvm::Value *OutlinedFnID, 6828 const Expr *IfCond, const Expr *Device, 6829 ArrayRef<llvm::Value *> CapturedVars) { 6830 if (!CGF.HaveInsertPoint()) 6831 return; 6832 6833 assert(OutlinedFn && "Invalid outlined function!"); 6834 6835 auto &Ctx = CGF.getContext(); 6836 6837 // Fill up the arrays with all the captured variables. 6838 MappableExprsHandler::MapValuesArrayTy KernelArgs; 6839 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 6840 MappableExprsHandler::MapValuesArrayTy Pointers; 6841 MappableExprsHandler::MapValuesArrayTy Sizes; 6842 MappableExprsHandler::MapFlagsArrayTy MapTypes; 6843 6844 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 6845 MappableExprsHandler::MapValuesArrayTy CurPointers; 6846 MappableExprsHandler::MapValuesArrayTy CurSizes; 6847 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 6848 6849 // Get mappable expression information. 6850 MappableExprsHandler MEHandler(D, CGF); 6851 6852 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 6853 auto RI = CS.getCapturedRecordDecl()->field_begin(); 6854 auto CV = CapturedVars.begin(); 6855 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 6856 CE = CS.capture_end(); 6857 CI != CE; ++CI, ++RI, ++CV) { 6858 StringRef Name; 6859 QualType Ty; 6860 6861 CurBasePointers.clear(); 6862 CurPointers.clear(); 6863 CurSizes.clear(); 6864 CurMapTypes.clear(); 6865 6866 // VLA sizes are passed to the outlined region by copy and do not have map 6867 // information associated. 6868 if (CI->capturesVariableArrayType()) { 6869 CurBasePointers.push_back(*CV); 6870 CurPointers.push_back(*CV); 6871 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 6872 // Copy to the device as an argument. No need to retrieve it. 6873 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | 6874 MappableExprsHandler::OMP_MAP_FIRST_REF); 6875 } else { 6876 // If we have any information in the map clause, we use it, otherwise we 6877 // just do a default mapping. 6878 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 6879 CurSizes, CurMapTypes); 6880 if (CurBasePointers.empty()) 6881 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 6882 CurPointers, CurSizes, CurMapTypes); 6883 } 6884 // We expect to have at least an element of information for this capture. 6885 assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); 6886 assert(CurBasePointers.size() == CurPointers.size() && 6887 CurBasePointers.size() == CurSizes.size() && 6888 CurBasePointers.size() == CurMapTypes.size() && 6889 "Inconsistent map information sizes!"); 6890 6891 // The kernel args are always the first elements of the base pointers 6892 // associated with a capture. 6893 KernelArgs.push_back(*CurBasePointers.front()); 6894 // We need to append the results of this capture to what we already have. 6895 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 6896 Pointers.append(CurPointers.begin(), CurPointers.end()); 6897 Sizes.append(CurSizes.begin(), CurSizes.end()); 6898 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 6899 } 6900 6901 // Keep track on whether the host function has to be executed. 6902 auto OffloadErrorQType = 6903 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 6904 auto OffloadError = CGF.MakeAddrLValue( 6905 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 6906 OffloadErrorQType); 6907 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 6908 OffloadError); 6909 6910 // Fill up the pointer arrays and transfer execution to the device. 6911 auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device, 6912 OutlinedFnID, OffloadError, 6913 &D](CodeGenFunction &CGF, PrePostActionTy &) { 6914 auto &RT = CGF.CGM.getOpenMPRuntime(); 6915 // Emit the offloading arrays. 6916 TargetDataInfo Info; 6917 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 6918 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 6919 Info.PointersArray, Info.SizesArray, 6920 Info.MapTypesArray, Info); 6921 6922 // On top of the arrays that were filled up, the target offloading call 6923 // takes as arguments the device id as well as the host pointer. The host 6924 // pointer is used by the runtime library to identify the current target 6925 // region, so it only has to be unique and not necessarily point to 6926 // anything. It could be the pointer to the outlined function that 6927 // implements the target region, but we aren't using that so that the 6928 // compiler doesn't need to keep that, and could therefore inline the host 6929 // function if proven worthwhile during optimization. 6930 6931 // From this point on, we need to have an ID of the target region defined. 6932 assert(OutlinedFnID && "Invalid outlined function ID!"); 6933 6934 // Emit device ID if any. 6935 llvm::Value *DeviceID; 6936 if (Device) 6937 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 6938 CGF.Int32Ty, /*isSigned=*/true); 6939 else 6940 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 6941 6942 // Emit the number of elements in the offloading arrays. 6943 llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 6944 6945 // Return value of the runtime offloading call. 6946 llvm::Value *Return; 6947 6948 auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); 6949 auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); 6950 6951 // The target region is an outlined function launched by the runtime 6952 // via calls __tgt_target() or __tgt_target_teams(). 6953 // 6954 // __tgt_target() launches a target region with one team and one thread, 6955 // executing a serial region. This master thread may in turn launch 6956 // more threads within its team upon encountering a parallel region, 6957 // however, no additional teams can be launched on the device. 6958 // 6959 // __tgt_target_teams() launches a target region with one or more teams, 6960 // each with one or more threads. This call is required for target 6961 // constructs such as: 6962 // 'target teams' 6963 // 'target' / 'teams' 6964 // 'target teams distribute parallel for' 6965 // 'target parallel' 6966 // and so on. 6967 // 6968 // Note that on the host and CPU targets, the runtime implementation of 6969 // these calls simply call the outlined function without forking threads. 6970 // The outlined functions themselves have runtime calls to 6971 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 6972 // the compiler in emitTeamsCall() and emitParallelCall(). 6973 // 6974 // In contrast, on the NVPTX target, the implementation of 6975 // __tgt_target_teams() launches a GPU kernel with the requested number 6976 // of teams and threads so no additional calls to the runtime are required. 6977 if (NumTeams) { 6978 // If we have NumTeams defined this means that we have an enclosed teams 6979 // region. Therefore we also expect to have NumThreads defined. These two 6980 // values should be defined in the presence of a teams directive, 6981 // regardless of having any clauses associated. If the user is using teams 6982 // but no clauses, these two values will be the default that should be 6983 // passed to the runtime library - a 32-bit integer with the value zero. 6984 assert(NumThreads && "Thread limit expression should be available along " 6985 "with number of teams."); 6986 llvm::Value *OffloadingArgs[] = { 6987 DeviceID, OutlinedFnID, 6988 PointerNum, Info.BasePointersArray, 6989 Info.PointersArray, Info.SizesArray, 6990 Info.MapTypesArray, NumTeams, 6991 NumThreads}; 6992 Return = CGF.EmitRuntimeCall( 6993 RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); 6994 } else { 6995 llvm::Value *OffloadingArgs[] = { 6996 DeviceID, OutlinedFnID, 6997 PointerNum, Info.BasePointersArray, 6998 Info.PointersArray, Info.SizesArray, 6999 Info.MapTypesArray}; 7000 Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), 7001 OffloadingArgs); 7002 } 7003 7004 CGF.EmitStoreOfScalar(Return, OffloadError); 7005 }; 7006 7007 // Notify that the host version must be executed. 7008 auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { 7009 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), 7010 OffloadError); 7011 }; 7012 7013 // If we have a target function ID it means that we need to support 7014 // offloading, otherwise, just execute on the host. We need to execute on host 7015 // regardless of the conditional in the if clause if, e.g., the user do not 7016 // specify target triples. 7017 if (OutlinedFnID) { 7018 if (IfCond) 7019 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 7020 else { 7021 RegionCodeGenTy ThenRCG(ThenGen); 7022 ThenRCG(CGF); 7023 } 7024 } else { 7025 RegionCodeGenTy ElseRCG(ElseGen); 7026 ElseRCG(CGF); 7027 } 7028 7029 // Check the error code and execute the host version if required. 7030 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 7031 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 7032 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 7033 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 7034 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 7035 7036 CGF.EmitBlock(OffloadFailedBlock); 7037 CGF.Builder.CreateCall(OutlinedFn, KernelArgs); 7038 CGF.EmitBranch(OffloadContBlock); 7039 7040 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 7041 } 7042 7043 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 7044 StringRef ParentName) { 7045 if (!S) 7046 return; 7047 7048 // Codegen OMP target directives that offload compute to the device. 7049 bool requiresDeviceCodegen = 7050 isa<OMPExecutableDirective>(S) && 7051 isOpenMPTargetExecutionDirective( 7052 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 7053 7054 if (requiresDeviceCodegen) { 7055 auto &E = *cast<OMPExecutableDirective>(S); 7056 unsigned DeviceID; 7057 unsigned FileID; 7058 unsigned Line; 7059 getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, 7060 FileID, Line); 7061 7062 // Is this a target region that should not be emitted as an entry point? If 7063 // so just signal we are done with this target region. 7064 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 7065 ParentName, Line)) 7066 return; 7067 7068 switch (S->getStmtClass()) { 7069 case Stmt::OMPTargetDirectiveClass: 7070 CodeGenFunction::EmitOMPTargetDeviceFunction( 7071 CGM, ParentName, cast<OMPTargetDirective>(*S)); 7072 break; 7073 case Stmt::OMPTargetParallelDirectiveClass: 7074 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7075 CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); 7076 break; 7077 case Stmt::OMPTargetTeamsDirectiveClass: 7078 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 7079 CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); 7080 break; 7081 default: 7082 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 7083 } 7084 return; 7085 } 7086 7087 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 7088 if (!E->hasAssociatedStmt()) 7089 return; 7090 7091 scanForTargetRegionsFunctions( 7092 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 7093 ParentName); 7094 return; 7095 } 7096 7097 // If this is a lambda function, look into its body. 7098 if (auto *L = dyn_cast<LambdaExpr>(S)) 7099 S = L->getBody(); 7100 7101 // Keep looking for target regions recursively. 7102 for (auto *II : S->children()) 7103 scanForTargetRegionsFunctions(II, ParentName); 7104 } 7105 7106 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 7107 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 7108 7109 // If emitting code for the host, we do not process FD here. Instead we do 7110 // the normal code generation. 7111 if (!CGM.getLangOpts().OpenMPIsDevice) 7112 return false; 7113 7114 // Try to detect target regions in the function. 7115 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 7116 7117 // We should not emit any function other that the ones created during the 7118 // scanning. Therefore, we signal that this function is completely dealt 7119 // with. 7120 return true; 7121 } 7122 7123 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 7124 if (!CGM.getLangOpts().OpenMPIsDevice) 7125 return false; 7126 7127 // Check if there are Ctors/Dtors in this declaration and look for target 7128 // regions in it. We use the complete variant to produce the kernel name 7129 // mangling. 7130 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 7131 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 7132 for (auto *Ctor : RD->ctors()) { 7133 StringRef ParentName = 7134 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 7135 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 7136 } 7137 auto *Dtor = RD->getDestructor(); 7138 if (Dtor) { 7139 StringRef ParentName = 7140 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 7141 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 7142 } 7143 } 7144 7145 // If we are in target mode, we do not emit any global (declare target is not 7146 // implemented yet). Therefore we signal that GD was processed in this case. 7147 return true; 7148 } 7149 7150 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 7151 auto *VD = GD.getDecl(); 7152 if (isa<FunctionDecl>(VD)) 7153 return emitTargetFunctions(GD); 7154 7155 return emitTargetGlobalVariable(GD); 7156 } 7157 7158 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 7159 // If we have offloading in the current module, we need to emit the entries 7160 // now and register the offloading descriptor. 7161 createOffloadEntriesAndInfoMetadata(); 7162 7163 // Create and register the offloading binary descriptors. This is the main 7164 // entity that captures all the information about offloading in the current 7165 // compilation unit. 7166 return createOffloadingBinaryDescriptorRegistration(); 7167 } 7168 7169 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 7170 const OMPExecutableDirective &D, 7171 SourceLocation Loc, 7172 llvm::Value *OutlinedFn, 7173 ArrayRef<llvm::Value *> CapturedVars) { 7174 if (!CGF.HaveInsertPoint()) 7175 return; 7176 7177 auto *RTLoc = emitUpdateLocation(CGF, Loc); 7178 CodeGenFunction::RunCleanupsScope Scope(CGF); 7179 7180 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 7181 llvm::Value *Args[] = { 7182 RTLoc, 7183 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 7184 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 7185 llvm::SmallVector<llvm::Value *, 16> RealArgs; 7186 RealArgs.append(std::begin(Args), std::end(Args)); 7187 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 7188 7189 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 7190 CGF.EmitRuntimeCall(RTLFn, RealArgs); 7191 } 7192 7193 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 7194 const Expr *NumTeams, 7195 const Expr *ThreadLimit, 7196 SourceLocation Loc) { 7197 if (!CGF.HaveInsertPoint()) 7198 return; 7199 7200 auto *RTLoc = emitUpdateLocation(CGF, Loc); 7201 7202 llvm::Value *NumTeamsVal = 7203 (NumTeams) 7204 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 7205 CGF.CGM.Int32Ty, /* isSigned = */ true) 7206 : CGF.Builder.getInt32(0); 7207 7208 llvm::Value *ThreadLimitVal = 7209 (ThreadLimit) 7210 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 7211 CGF.CGM.Int32Ty, /* isSigned = */ true) 7212 : CGF.Builder.getInt32(0); 7213 7214 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 7215 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 7216 ThreadLimitVal}; 7217 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 7218 PushNumTeamsArgs); 7219 } 7220 7221 void CGOpenMPRuntime::emitTargetDataCalls( 7222 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 7223 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 7224 if (!CGF.HaveInsertPoint()) 7225 return; 7226 7227 // Action used to replace the default codegen action and turn privatization 7228 // off. 7229 PrePostActionTy NoPrivAction; 7230 7231 // Generate the code for the opening of the data environment. Capture all the 7232 // arguments of the runtime call by reference because they are used in the 7233 // closing of the region. 7234 auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF, 7235 PrePostActionTy &) { 7236 // Fill up the arrays with all the mapped variables. 7237 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7238 MappableExprsHandler::MapValuesArrayTy Pointers; 7239 MappableExprsHandler::MapValuesArrayTy Sizes; 7240 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7241 7242 // Get map clause information. 7243 MappableExprsHandler MCHandler(D, CGF); 7244 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 7245 7246 // Fill up the arrays and create the arguments. 7247 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7248 7249 llvm::Value *BasePointersArrayArg = nullptr; 7250 llvm::Value *PointersArrayArg = nullptr; 7251 llvm::Value *SizesArrayArg = nullptr; 7252 llvm::Value *MapTypesArrayArg = nullptr; 7253 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 7254 SizesArrayArg, MapTypesArrayArg, Info); 7255 7256 // Emit device ID if any. 7257 llvm::Value *DeviceID = nullptr; 7258 if (Device) 7259 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7260 CGF.Int32Ty, /*isSigned=*/true); 7261 else 7262 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 7263 7264 // Emit the number of elements in the offloading arrays. 7265 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 7266 7267 llvm::Value *OffloadingArgs[] = { 7268 DeviceID, PointerNum, BasePointersArrayArg, 7269 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 7270 auto &RT = CGF.CGM.getOpenMPRuntime(); 7271 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), 7272 OffloadingArgs); 7273 7274 // If device pointer privatization is required, emit the body of the region 7275 // here. It will have to be duplicated: with and without privatization. 7276 if (!Info.CaptureDeviceAddrMap.empty()) 7277 CodeGen(CGF); 7278 }; 7279 7280 // Generate code for the closing of the data region. 7281 auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { 7282 assert(Info.isValid() && "Invalid data environment closing arguments."); 7283 7284 llvm::Value *BasePointersArrayArg = nullptr; 7285 llvm::Value *PointersArrayArg = nullptr; 7286 llvm::Value *SizesArrayArg = nullptr; 7287 llvm::Value *MapTypesArrayArg = nullptr; 7288 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 7289 SizesArrayArg, MapTypesArrayArg, Info); 7290 7291 // Emit device ID if any. 7292 llvm::Value *DeviceID = nullptr; 7293 if (Device) 7294 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7295 CGF.Int32Ty, /*isSigned=*/true); 7296 else 7297 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 7298 7299 // Emit the number of elements in the offloading arrays. 7300 auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 7301 7302 llvm::Value *OffloadingArgs[] = { 7303 DeviceID, PointerNum, BasePointersArrayArg, 7304 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 7305 auto &RT = CGF.CGM.getOpenMPRuntime(); 7306 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), 7307 OffloadingArgs); 7308 }; 7309 7310 // If we need device pointer privatization, we need to emit the body of the 7311 // region with no privatization in the 'else' branch of the conditional. 7312 // Otherwise, we don't have to do anything. 7313 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 7314 PrePostActionTy &) { 7315 if (!Info.CaptureDeviceAddrMap.empty()) { 7316 CodeGen.setAction(NoPrivAction); 7317 CodeGen(CGF); 7318 } 7319 }; 7320 7321 // We don't have to do anything to close the region if the if clause evaluates 7322 // to false. 7323 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 7324 7325 if (IfCond) { 7326 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 7327 } else { 7328 RegionCodeGenTy RCG(BeginThenGen); 7329 RCG(CGF); 7330 } 7331 7332 // If we don't require privatization of device pointers, we emit the body in 7333 // between the runtime calls. This avoids duplicating the body code. 7334 if (Info.CaptureDeviceAddrMap.empty()) { 7335 CodeGen.setAction(NoPrivAction); 7336 CodeGen(CGF); 7337 } 7338 7339 if (IfCond) { 7340 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 7341 } else { 7342 RegionCodeGenTy RCG(EndThenGen); 7343 RCG(CGF); 7344 } 7345 } 7346 7347 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 7348 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 7349 const Expr *Device) { 7350 if (!CGF.HaveInsertPoint()) 7351 return; 7352 7353 assert((isa<OMPTargetEnterDataDirective>(D) || 7354 isa<OMPTargetExitDataDirective>(D) || 7355 isa<OMPTargetUpdateDirective>(D)) && 7356 "Expecting either target enter, exit data, or update directives."); 7357 7358 // Generate the code for the opening of the data environment. 7359 auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { 7360 // Fill up the arrays with all the mapped variables. 7361 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 7362 MappableExprsHandler::MapValuesArrayTy Pointers; 7363 MappableExprsHandler::MapValuesArrayTy Sizes; 7364 MappableExprsHandler::MapFlagsArrayTy MapTypes; 7365 7366 // Get map clause information. 7367 MappableExprsHandler MEHandler(D, CGF); 7368 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 7369 7370 // Fill up the arrays and create the arguments. 7371 TargetDataInfo Info; 7372 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 7373 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 7374 Info.PointersArray, Info.SizesArray, 7375 Info.MapTypesArray, Info); 7376 7377 // Emit device ID if any. 7378 llvm::Value *DeviceID = nullptr; 7379 if (Device) 7380 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 7381 CGF.Int32Ty, /*isSigned=*/true); 7382 else 7383 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 7384 7385 // Emit the number of elements in the offloading arrays. 7386 auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 7387 7388 llvm::Value *OffloadingArgs[] = { 7389 DeviceID, PointerNum, Info.BasePointersArray, 7390 Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; 7391 7392 auto &RT = CGF.CGM.getOpenMPRuntime(); 7393 // Select the right runtime function call for each expected standalone 7394 // directive. 7395 OpenMPRTLFunction RTLFn; 7396 switch (D.getDirectiveKind()) { 7397 default: 7398 llvm_unreachable("Unexpected standalone target data directive."); 7399 break; 7400 case OMPD_target_enter_data: 7401 RTLFn = OMPRTL__tgt_target_data_begin; 7402 break; 7403 case OMPD_target_exit_data: 7404 RTLFn = OMPRTL__tgt_target_data_end; 7405 break; 7406 case OMPD_target_update: 7407 RTLFn = OMPRTL__tgt_target_data_update; 7408 break; 7409 } 7410 CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); 7411 }; 7412 7413 // In the event we get an if clause, we don't have to take any action on the 7414 // else side. 7415 auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 7416 7417 if (IfCond) { 7418 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 7419 } else { 7420 RegionCodeGenTy ThenGenRCG(ThenGen); 7421 ThenGenRCG(CGF); 7422 } 7423 } 7424 7425 namespace { 7426 /// Kind of parameter in a function with 'declare simd' directive. 7427 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 7428 /// Attribute set of the parameter. 7429 struct ParamAttrTy { 7430 ParamKindTy Kind = Vector; 7431 llvm::APSInt StrideOrArg; 7432 llvm::APSInt Alignment; 7433 }; 7434 } // namespace 7435 7436 static unsigned evaluateCDTSize(const FunctionDecl *FD, 7437 ArrayRef<ParamAttrTy> ParamAttrs) { 7438 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 7439 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 7440 // of that clause. The VLEN value must be power of 2. 7441 // In other case the notion of the function`s "characteristic data type" (CDT) 7442 // is used to compute the vector length. 7443 // CDT is defined in the following order: 7444 // a) For non-void function, the CDT is the return type. 7445 // b) If the function has any non-uniform, non-linear parameters, then the 7446 // CDT is the type of the first such parameter. 7447 // c) If the CDT determined by a) or b) above is struct, union, or class 7448 // type which is pass-by-value (except for the type that maps to the 7449 // built-in complex data type), the characteristic data type is int. 7450 // d) If none of the above three cases is applicable, the CDT is int. 7451 // The VLEN is then determined based on the CDT and the size of vector 7452 // register of that ISA for which current vector version is generated. The 7453 // VLEN is computed using the formula below: 7454 // VLEN = sizeof(vector_register) / sizeof(CDT), 7455 // where vector register size specified in section 3.2.1 Registers and the 7456 // Stack Frame of original AMD64 ABI document. 7457 QualType RetType = FD->getReturnType(); 7458 if (RetType.isNull()) 7459 return 0; 7460 ASTContext &C = FD->getASTContext(); 7461 QualType CDT; 7462 if (!RetType.isNull() && !RetType->isVoidType()) 7463 CDT = RetType; 7464 else { 7465 unsigned Offset = 0; 7466 if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 7467 if (ParamAttrs[Offset].Kind == Vector) 7468 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 7469 ++Offset; 7470 } 7471 if (CDT.isNull()) { 7472 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 7473 if (ParamAttrs[I + Offset].Kind == Vector) { 7474 CDT = FD->getParamDecl(I)->getType(); 7475 break; 7476 } 7477 } 7478 } 7479 } 7480 if (CDT.isNull()) 7481 CDT = C.IntTy; 7482 CDT = CDT->getCanonicalTypeUnqualified(); 7483 if (CDT->isRecordType() || CDT->isUnionType()) 7484 CDT = C.IntTy; 7485 return C.getTypeSize(CDT); 7486 } 7487 7488 static void 7489 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 7490 const llvm::APSInt &VLENVal, 7491 ArrayRef<ParamAttrTy> ParamAttrs, 7492 OMPDeclareSimdDeclAttr::BranchStateTy State) { 7493 struct ISADataTy { 7494 char ISA; 7495 unsigned VecRegSize; 7496 }; 7497 ISADataTy ISAData[] = { 7498 { 7499 'b', 128 7500 }, // SSE 7501 { 7502 'c', 256 7503 }, // AVX 7504 { 7505 'd', 256 7506 }, // AVX2 7507 { 7508 'e', 512 7509 }, // AVX512 7510 }; 7511 llvm::SmallVector<char, 2> Masked; 7512 switch (State) { 7513 case OMPDeclareSimdDeclAttr::BS_Undefined: 7514 Masked.push_back('N'); 7515 Masked.push_back('M'); 7516 break; 7517 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 7518 Masked.push_back('N'); 7519 break; 7520 case OMPDeclareSimdDeclAttr::BS_Inbranch: 7521 Masked.push_back('M'); 7522 break; 7523 } 7524 for (auto Mask : Masked) { 7525 for (auto &Data : ISAData) { 7526 SmallString<256> Buffer; 7527 llvm::raw_svector_ostream Out(Buffer); 7528 Out << "_ZGV" << Data.ISA << Mask; 7529 if (!VLENVal) { 7530 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / 7531 evaluateCDTSize(FD, ParamAttrs)); 7532 } else 7533 Out << VLENVal; 7534 for (auto &ParamAttr : ParamAttrs) { 7535 switch (ParamAttr.Kind){ 7536 case LinearWithVarStride: 7537 Out << 's' << ParamAttr.StrideOrArg; 7538 break; 7539 case Linear: 7540 Out << 'l'; 7541 if (!!ParamAttr.StrideOrArg) 7542 Out << ParamAttr.StrideOrArg; 7543 break; 7544 case Uniform: 7545 Out << 'u'; 7546 break; 7547 case Vector: 7548 Out << 'v'; 7549 break; 7550 } 7551 if (!!ParamAttr.Alignment) 7552 Out << 'a' << ParamAttr.Alignment; 7553 } 7554 Out << '_' << Fn->getName(); 7555 Fn->addFnAttr(Out.str()); 7556 } 7557 } 7558 } 7559 7560 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 7561 llvm::Function *Fn) { 7562 ASTContext &C = CGM.getContext(); 7563 FD = FD->getCanonicalDecl(); 7564 // Map params to their positions in function decl. 7565 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 7566 if (isa<CXXMethodDecl>(FD)) 7567 ParamPositions.insert({FD, 0}); 7568 unsigned ParamPos = ParamPositions.size(); 7569 for (auto *P : FD->parameters()) { 7570 ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); 7571 ++ParamPos; 7572 } 7573 for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 7574 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 7575 // Mark uniform parameters. 7576 for (auto *E : Attr->uniforms()) { 7577 E = E->IgnoreParenImpCasts(); 7578 unsigned Pos; 7579 if (isa<CXXThisExpr>(E)) 7580 Pos = ParamPositions[FD]; 7581 else { 7582 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 7583 ->getCanonicalDecl(); 7584 Pos = ParamPositions[PVD]; 7585 } 7586 ParamAttrs[Pos].Kind = Uniform; 7587 } 7588 // Get alignment info. 7589 auto NI = Attr->alignments_begin(); 7590 for (auto *E : Attr->aligneds()) { 7591 E = E->IgnoreParenImpCasts(); 7592 unsigned Pos; 7593 QualType ParmTy; 7594 if (isa<CXXThisExpr>(E)) { 7595 Pos = ParamPositions[FD]; 7596 ParmTy = E->getType(); 7597 } else { 7598 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 7599 ->getCanonicalDecl(); 7600 Pos = ParamPositions[PVD]; 7601 ParmTy = PVD->getType(); 7602 } 7603 ParamAttrs[Pos].Alignment = 7604 (*NI) ? (*NI)->EvaluateKnownConstInt(C) 7605 : llvm::APSInt::getUnsigned( 7606 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 7607 .getQuantity()); 7608 ++NI; 7609 } 7610 // Mark linear parameters. 7611 auto SI = Attr->steps_begin(); 7612 auto MI = Attr->modifiers_begin(); 7613 for (auto *E : Attr->linears()) { 7614 E = E->IgnoreParenImpCasts(); 7615 unsigned Pos; 7616 if (isa<CXXThisExpr>(E)) 7617 Pos = ParamPositions[FD]; 7618 else { 7619 auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 7620 ->getCanonicalDecl(); 7621 Pos = ParamPositions[PVD]; 7622 } 7623 auto &ParamAttr = ParamAttrs[Pos]; 7624 ParamAttr.Kind = Linear; 7625 if (*SI) { 7626 if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, 7627 Expr::SE_AllowSideEffects)) { 7628 if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 7629 if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 7630 ParamAttr.Kind = LinearWithVarStride; 7631 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 7632 ParamPositions[StridePVD->getCanonicalDecl()]); 7633 } 7634 } 7635 } 7636 } 7637 ++SI; 7638 ++MI; 7639 } 7640 llvm::APSInt VLENVal; 7641 if (const Expr *VLEN = Attr->getSimdlen()) 7642 VLENVal = VLEN->EvaluateKnownConstInt(C); 7643 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 7644 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 7645 CGM.getTriple().getArch() == llvm::Triple::x86_64) 7646 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 7647 } 7648 } 7649 7650 namespace { 7651 /// Cleanup action for doacross support. 7652 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 7653 public: 7654 static const int DoacrossFinArgs = 2; 7655 7656 private: 7657 llvm::Value *RTLFn; 7658 llvm::Value *Args[DoacrossFinArgs]; 7659 7660 public: 7661 DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) 7662 : RTLFn(RTLFn) { 7663 assert(CallArgs.size() == DoacrossFinArgs); 7664 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 7665 } 7666 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 7667 if (!CGF.HaveInsertPoint()) 7668 return; 7669 CGF.EmitRuntimeCall(RTLFn, Args); 7670 } 7671 }; 7672 } // namespace 7673 7674 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 7675 const OMPLoopDirective &D) { 7676 if (!CGF.HaveInsertPoint()) 7677 return; 7678 7679 ASTContext &C = CGM.getContext(); 7680 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 7681 RecordDecl *RD; 7682 if (KmpDimTy.isNull()) { 7683 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 7684 // kmp_int64 lo; // lower 7685 // kmp_int64 up; // upper 7686 // kmp_int64 st; // stride 7687 // }; 7688 RD = C.buildImplicitRecord("kmp_dim"); 7689 RD->startDefinition(); 7690 addFieldToRecordDecl(C, RD, Int64Ty); 7691 addFieldToRecordDecl(C, RD, Int64Ty); 7692 addFieldToRecordDecl(C, RD, Int64Ty); 7693 RD->completeDefinition(); 7694 KmpDimTy = C.getRecordType(RD); 7695 } else 7696 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 7697 7698 Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); 7699 CGF.EmitNullInitialization(DimsAddr, KmpDimTy); 7700 enum { LowerFD = 0, UpperFD, StrideFD }; 7701 // Fill dims with data. 7702 LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); 7703 // dims.upper = num_iterations; 7704 LValue UpperLVal = 7705 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); 7706 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 7707 CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), 7708 Int64Ty, D.getNumIterations()->getExprLoc()); 7709 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 7710 // dims.stride = 1; 7711 LValue StrideLVal = 7712 CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); 7713 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 7714 StrideLVal); 7715 7716 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 7717 // kmp_int32 num_dims, struct kmp_dim * dims); 7718 llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), 7719 getThreadID(CGF, D.getLocStart()), 7720 llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), 7721 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7722 DimsAddr.getPointer(), CGM.VoidPtrTy)}; 7723 7724 llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); 7725 CGF.EmitRuntimeCall(RTLFn, Args); 7726 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 7727 emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; 7728 llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 7729 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 7730 llvm::makeArrayRef(FiniArgs)); 7731 } 7732 7733 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 7734 const OMPDependClause *C) { 7735 QualType Int64Ty = 7736 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 7737 const Expr *CounterVal = C->getCounterValue(); 7738 assert(CounterVal); 7739 llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), 7740 CounterVal->getType(), Int64Ty, 7741 CounterVal->getExprLoc()); 7742 Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); 7743 CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); 7744 llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), 7745 getThreadID(CGF, C->getLocStart()), 7746 CntAddr.getPointer()}; 7747 llvm::Value *RTLFn; 7748 if (C->getDependencyKind() == OMPC_DEPEND_source) 7749 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 7750 else { 7751 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 7752 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 7753 } 7754 CGF.EmitRuntimeCall(RTLFn, Args); 7755 } 7756 7757