1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/StmtOpenMP.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/Bitcode/ReaderWriter.h" 22 #include "llvm/IR/CallSite.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/GlobalValue.h" 25 #include "llvm/IR/Value.h" 26 #include "llvm/Support/Format.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 30 using namespace clang; 31 using namespace CodeGen; 32 33 namespace { 34 /// \brief Base class for handling code generation inside OpenMP regions. 35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 36 public: 37 /// \brief Kinds of OpenMP regions used in codegen. 38 enum CGOpenMPRegionKind { 39 /// \brief Region with outlined function for standalone 'parallel' 40 /// directive. 41 ParallelOutlinedRegion, 42 /// \brief Region with outlined function for standalone 'task' directive. 43 TaskOutlinedRegion, 44 /// \brief Region for constructs that do not require function outlining, 45 /// like 'for', 'sections', 'atomic' etc. directives. 46 InlinedRegion, 47 /// \brief Region with outlined function for standalone 'target' directive. 48 TargetRegion, 49 }; 50 51 CGOpenMPRegionInfo(const CapturedStmt &CS, 52 const CGOpenMPRegionKind RegionKind, 53 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 54 bool HasCancel) 55 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 56 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 57 58 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 59 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 60 bool HasCancel) 61 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 62 Kind(Kind), HasCancel(HasCancel) {} 63 64 /// \brief Get a variable or parameter for storing global thread id 65 /// inside OpenMP construct. 66 virtual const VarDecl *getThreadIDVariable() const = 0; 67 68 /// \brief Emit the captured statement body. 69 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 70 71 /// \brief Get an LValue for the current ThreadID variable. 72 /// \return LValue for thread id variable. This LValue always has type int32*. 73 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 74 75 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 76 77 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 78 79 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 80 81 bool hasCancel() const { return HasCancel; } 82 83 static bool classof(const CGCapturedStmtInfo *Info) { 84 return Info->getKind() == CR_OpenMP; 85 } 86 87 ~CGOpenMPRegionInfo() override = default; 88 89 protected: 90 CGOpenMPRegionKind RegionKind; 91 RegionCodeGenTy CodeGen; 92 OpenMPDirectiveKind Kind; 93 bool HasCancel; 94 }; 95 96 /// \brief API for captured statement code generation in OpenMP constructs. 97 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 98 public: 99 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 100 const RegionCodeGenTy &CodeGen, 101 OpenMPDirectiveKind Kind, bool HasCancel) 102 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 103 HasCancel), 104 ThreadIDVar(ThreadIDVar) { 105 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 106 } 107 108 /// \brief Get a variable or parameter for storing global thread id 109 /// inside OpenMP construct. 110 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 111 112 /// \brief Get the name of the capture helper. 113 StringRef getHelperName() const override { return ".omp_outlined."; } 114 115 static bool classof(const CGCapturedStmtInfo *Info) { 116 return CGOpenMPRegionInfo::classof(Info) && 117 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 118 ParallelOutlinedRegion; 119 } 120 121 private: 122 /// \brief A variable or parameter storing global thread id for OpenMP 123 /// constructs. 124 const VarDecl *ThreadIDVar; 125 }; 126 127 /// \brief API for captured statement code generation in OpenMP constructs. 128 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 129 public: 130 class UntiedTaskActionTy final : public PrePostActionTy { 131 bool Untied; 132 const VarDecl *PartIDVar; 133 const RegionCodeGenTy UntiedCodeGen; 134 llvm::SwitchInst *UntiedSwitch = nullptr; 135 136 public: 137 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 138 const RegionCodeGenTy &UntiedCodeGen) 139 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 140 void Enter(CodeGenFunction &CGF) override { 141 if (Untied) { 142 // Emit task switching point. 143 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 144 CGF.GetAddrOfLocalVar(PartIDVar), 145 PartIDVar->getType()->castAs<PointerType>()); 146 auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); 147 auto *DoneBB = CGF.createBasicBlock(".untied.done."); 148 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 149 CGF.EmitBlock(DoneBB); 150 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 151 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 152 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 153 CGF.Builder.GetInsertBlock()); 154 emitUntiedSwitch(CGF); 155 } 156 } 157 void emitUntiedSwitch(CodeGenFunction &CGF) const { 158 if (Untied) { 159 auto PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 163 PartIdLVal); 164 UntiedCodeGen(CGF); 165 CodeGenFunction::JumpDest CurPoint = 166 CGF.getJumpDestInCurrentScope(".untied.next."); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 170 CGF.Builder.GetInsertBlock()); 171 CGF.EmitBranchThroughCleanup(CurPoint); 172 CGF.EmitBlock(CurPoint.getBlock()); 173 } 174 } 175 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 176 }; 177 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 178 const VarDecl *ThreadIDVar, 179 const RegionCodeGenTy &CodeGen, 180 OpenMPDirectiveKind Kind, bool HasCancel, 181 const UntiedTaskActionTy &Action) 182 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 183 ThreadIDVar(ThreadIDVar), Action(Action) { 184 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 185 } 186 187 /// \brief Get a variable or parameter for storing global thread id 188 /// inside OpenMP construct. 189 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 190 191 /// \brief Get an LValue for the current ThreadID variable. 192 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 193 194 /// \brief Get the name of the capture helper. 195 StringRef getHelperName() const override { return ".omp_outlined."; } 196 197 void emitUntiedSwitch(CodeGenFunction &CGF) override { 198 Action.emitUntiedSwitch(CGF); 199 } 200 201 static bool classof(const CGCapturedStmtInfo *Info) { 202 return CGOpenMPRegionInfo::classof(Info) && 203 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 204 TaskOutlinedRegion; 205 } 206 207 private: 208 /// \brief A variable or parameter storing global thread id for OpenMP 209 /// constructs. 210 const VarDecl *ThreadIDVar; 211 /// Action for emitting code for untied tasks. 212 const UntiedTaskActionTy &Action; 213 }; 214 215 /// \brief API for inlined captured statement code generation in OpenMP 216 /// constructs. 217 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 218 public: 219 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 220 const RegionCodeGenTy &CodeGen, 221 OpenMPDirectiveKind Kind, bool HasCancel) 222 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 223 OldCSI(OldCSI), 224 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 225 226 // \brief Retrieve the value of the context parameter. 227 llvm::Value *getContextValue() const override { 228 if (OuterRegionInfo) 229 return OuterRegionInfo->getContextValue(); 230 llvm_unreachable("No context value for inlined OpenMP region"); 231 } 232 233 void setContextValue(llvm::Value *V) override { 234 if (OuterRegionInfo) { 235 OuterRegionInfo->setContextValue(V); 236 return; 237 } 238 llvm_unreachable("No context value for inlined OpenMP region"); 239 } 240 241 /// \brief Lookup the captured field decl for a variable. 242 const FieldDecl *lookup(const VarDecl *VD) const override { 243 if (OuterRegionInfo) 244 return OuterRegionInfo->lookup(VD); 245 // If there is no outer outlined region,no need to lookup in a list of 246 // captured variables, we can use the original one. 247 return nullptr; 248 } 249 250 FieldDecl *getThisFieldDecl() const override { 251 if (OuterRegionInfo) 252 return OuterRegionInfo->getThisFieldDecl(); 253 return nullptr; 254 } 255 256 /// \brief Get a variable or parameter for storing global thread id 257 /// inside OpenMP construct. 258 const VarDecl *getThreadIDVariable() const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->getThreadIDVariable(); 261 return nullptr; 262 } 263 264 /// \brief Get the name of the capture helper. 265 StringRef getHelperName() const override { 266 if (auto *OuterRegionInfo = getOldCSI()) 267 return OuterRegionInfo->getHelperName(); 268 llvm_unreachable("No helper name for inlined OpenMP construct"); 269 } 270 271 void emitUntiedSwitch(CodeGenFunction &CGF) override { 272 if (OuterRegionInfo) 273 OuterRegionInfo->emitUntiedSwitch(CGF); 274 } 275 276 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 277 278 static bool classof(const CGCapturedStmtInfo *Info) { 279 return CGOpenMPRegionInfo::classof(Info) && 280 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 281 } 282 283 ~CGOpenMPInlinedRegionInfo() override = default; 284 285 private: 286 /// \brief CodeGen info about outer OpenMP region. 287 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 288 CGOpenMPRegionInfo *OuterRegionInfo; 289 }; 290 291 /// \brief API for captured statement code generation in OpenMP target 292 /// constructs. For this captures, implicit parameters are used instead of the 293 /// captured fields. The name of the target region has to be unique in a given 294 /// application so it is provided by the client, because only the client has 295 /// the information to generate that. 296 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 297 public: 298 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 299 const RegionCodeGenTy &CodeGen, StringRef HelperName) 300 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 301 /*HasCancel=*/false), 302 HelperName(HelperName) {} 303 304 /// \brief This is unused for target regions because each starts executing 305 /// with a single thread. 306 const VarDecl *getThreadIDVariable() const override { return nullptr; } 307 308 /// \brief Get the name of the capture helper. 309 StringRef getHelperName() const override { return HelperName; } 310 311 static bool classof(const CGCapturedStmtInfo *Info) { 312 return CGOpenMPRegionInfo::classof(Info) && 313 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 314 } 315 316 private: 317 StringRef HelperName; 318 }; 319 320 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 321 llvm_unreachable("No codegen for expressions"); 322 } 323 /// \brief API for generation of expressions captured in a innermost OpenMP 324 /// region. 325 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 326 public: 327 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 328 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 329 OMPD_unknown, 330 /*HasCancel=*/false), 331 PrivScope(CGF) { 332 // Make sure the globals captured in the provided statement are local by 333 // using the privatization logic. We assume the same variable is not 334 // captured more than once. 335 for (auto &C : CS.captures()) { 336 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 337 continue; 338 339 const VarDecl *VD = C.getCapturedVar(); 340 if (VD->isLocalVarDeclOrParm()) 341 continue; 342 343 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 344 /*RefersToEnclosingVariableOrCapture=*/false, 345 VD->getType().getNonReferenceType(), VK_LValue, 346 SourceLocation()); 347 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 348 return CGF.EmitLValue(&DRE).getAddress(); 349 }); 350 } 351 (void)PrivScope.Privatize(); 352 } 353 354 /// \brief Lookup the captured field decl for a variable. 355 const FieldDecl *lookup(const VarDecl *VD) const override { 356 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 357 return FD; 358 return nullptr; 359 } 360 361 /// \brief Emit the captured statement body. 362 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 363 llvm_unreachable("No body for expressions"); 364 } 365 366 /// \brief Get a variable or parameter for storing global thread id 367 /// inside OpenMP construct. 368 const VarDecl *getThreadIDVariable() const override { 369 llvm_unreachable("No thread id for expressions"); 370 } 371 372 /// \brief Get the name of the capture helper. 373 StringRef getHelperName() const override { 374 llvm_unreachable("No helper name for expressions"); 375 } 376 377 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 378 379 private: 380 /// Private scope to capture global variables. 381 CodeGenFunction::OMPPrivateScope PrivScope; 382 }; 383 384 /// \brief RAII for emitting code of OpenMP constructs. 385 class InlinedOpenMPRegionRAII { 386 CodeGenFunction &CGF; 387 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 388 FieldDecl *LambdaThisCaptureField = nullptr; 389 390 public: 391 /// \brief Constructs region for combined constructs. 392 /// \param CodeGen Code generation sequence for combined directives. Includes 393 /// a list of functions used for code generation of implicitly inlined 394 /// regions. 395 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 396 OpenMPDirectiveKind Kind, bool HasCancel) 397 : CGF(CGF) { 398 // Start emission for the construct. 399 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 400 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 401 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 402 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 403 CGF.LambdaThisCaptureField = nullptr; 404 } 405 406 ~InlinedOpenMPRegionRAII() { 407 // Restore original CapturedStmtInfo only if we're done with code emission. 408 auto *OldCSI = 409 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 410 delete CGF.CapturedStmtInfo; 411 CGF.CapturedStmtInfo = OldCSI; 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 414 } 415 }; 416 417 /// \brief Values for bit flags used in the ident_t to describe the fields. 418 /// All enumeric elements are named and described in accordance with the code 419 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 420 enum OpenMPLocationFlags { 421 /// \brief Use trampoline for internal microtask. 422 OMP_IDENT_IMD = 0x01, 423 /// \brief Use c-style ident structure. 424 OMP_IDENT_KMPC = 0x02, 425 /// \brief Atomic reduction option for kmpc_reduce. 426 OMP_ATOMIC_REDUCE = 0x10, 427 /// \brief Explicit 'barrier' directive. 428 OMP_IDENT_BARRIER_EXPL = 0x20, 429 /// \brief Implicit barrier in code. 430 OMP_IDENT_BARRIER_IMPL = 0x40, 431 /// \brief Implicit barrier in 'for' directive. 432 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 433 /// \brief Implicit barrier in 'sections' directive. 434 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 435 /// \brief Implicit barrier in 'single' directive. 436 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 437 }; 438 439 /// \brief Describes ident structure that describes a source location. 440 /// All descriptions are taken from 441 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 442 /// Original structure: 443 /// typedef struct ident { 444 /// kmp_int32 reserved_1; /**< might be used in Fortran; 445 /// see above */ 446 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 447 /// KMP_IDENT_KMPC identifies this union 448 /// member */ 449 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 450 /// see above */ 451 ///#if USE_ITT_BUILD 452 /// /* but currently used for storing 453 /// region-specific ITT */ 454 /// /* contextual information. */ 455 ///#endif /* USE_ITT_BUILD */ 456 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 457 /// C++ */ 458 /// char const *psource; /**< String describing the source location. 459 /// The string is composed of semi-colon separated 460 // fields which describe the source file, 461 /// the function and a pair of line numbers that 462 /// delimit the construct. 463 /// */ 464 /// } ident_t; 465 enum IdentFieldIndex { 466 /// \brief might be used in Fortran 467 IdentField_Reserved_1, 468 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 469 IdentField_Flags, 470 /// \brief Not really used in Fortran any more 471 IdentField_Reserved_2, 472 /// \brief Source[4] in Fortran, do not use for C++ 473 IdentField_Reserved_3, 474 /// \brief String describing the source location. The string is composed of 475 /// semi-colon separated fields which describe the source file, the function 476 /// and a pair of line numbers that delimit the construct. 477 IdentField_PSource 478 }; 479 480 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 481 /// the enum sched_type in kmp.h). 482 enum OpenMPSchedType { 483 /// \brief Lower bound for default (unordered) versions. 484 OMP_sch_lower = 32, 485 OMP_sch_static_chunked = 33, 486 OMP_sch_static = 34, 487 OMP_sch_dynamic_chunked = 35, 488 OMP_sch_guided_chunked = 36, 489 OMP_sch_runtime = 37, 490 OMP_sch_auto = 38, 491 /// \brief Lower bound for 'ordered' versions. 492 OMP_ord_lower = 64, 493 OMP_ord_static_chunked = 65, 494 OMP_ord_static = 66, 495 OMP_ord_dynamic_chunked = 67, 496 OMP_ord_guided_chunked = 68, 497 OMP_ord_runtime = 69, 498 OMP_ord_auto = 70, 499 OMP_sch_default = OMP_sch_static, 500 /// \brief dist_schedule types 501 OMP_dist_sch_static_chunked = 91, 502 OMP_dist_sch_static = 92, 503 }; 504 505 enum OpenMPRTLFunction { 506 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 507 /// kmpc_micro microtask, ...); 508 OMPRTL__kmpc_fork_call, 509 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 510 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 511 OMPRTL__kmpc_threadprivate_cached, 512 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 513 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 514 OMPRTL__kmpc_threadprivate_register, 515 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 516 OMPRTL__kmpc_global_thread_num, 517 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 518 // kmp_critical_name *crit); 519 OMPRTL__kmpc_critical, 520 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 521 // global_tid, kmp_critical_name *crit, uintptr_t hint); 522 OMPRTL__kmpc_critical_with_hint, 523 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 524 // kmp_critical_name *crit); 525 OMPRTL__kmpc_end_critical, 526 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 527 // global_tid); 528 OMPRTL__kmpc_cancel_barrier, 529 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 530 OMPRTL__kmpc_barrier, 531 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 532 OMPRTL__kmpc_for_static_fini, 533 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 534 // global_tid); 535 OMPRTL__kmpc_serialized_parallel, 536 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 537 // global_tid); 538 OMPRTL__kmpc_end_serialized_parallel, 539 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 540 // kmp_int32 num_threads); 541 OMPRTL__kmpc_push_num_threads, 542 // Call to void __kmpc_flush(ident_t *loc); 543 OMPRTL__kmpc_flush, 544 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 545 OMPRTL__kmpc_master, 546 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 547 OMPRTL__kmpc_end_master, 548 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 549 // int end_part); 550 OMPRTL__kmpc_omp_taskyield, 551 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 552 OMPRTL__kmpc_single, 553 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 554 OMPRTL__kmpc_end_single, 555 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 556 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 557 // kmp_routine_entry_t *task_entry); 558 OMPRTL__kmpc_omp_task_alloc, 559 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 560 // new_task); 561 OMPRTL__kmpc_omp_task, 562 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 563 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 564 // kmp_int32 didit); 565 OMPRTL__kmpc_copyprivate, 566 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 567 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 568 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 569 OMPRTL__kmpc_reduce, 570 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 571 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 572 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 573 // *lck); 574 OMPRTL__kmpc_reduce_nowait, 575 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 576 // kmp_critical_name *lck); 577 OMPRTL__kmpc_end_reduce, 578 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 579 // kmp_critical_name *lck); 580 OMPRTL__kmpc_end_reduce_nowait, 581 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 582 // kmp_task_t * new_task); 583 OMPRTL__kmpc_omp_task_begin_if0, 584 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 585 // kmp_task_t * new_task); 586 OMPRTL__kmpc_omp_task_complete_if0, 587 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 588 OMPRTL__kmpc_ordered, 589 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 590 OMPRTL__kmpc_end_ordered, 591 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 592 // global_tid); 593 OMPRTL__kmpc_omp_taskwait, 594 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 595 OMPRTL__kmpc_taskgroup, 596 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 597 OMPRTL__kmpc_end_taskgroup, 598 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 599 // int proc_bind); 600 OMPRTL__kmpc_push_proc_bind, 601 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 602 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 603 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 604 OMPRTL__kmpc_omp_task_with_deps, 605 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 606 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 607 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 608 OMPRTL__kmpc_omp_wait_deps, 609 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 610 // global_tid, kmp_int32 cncl_kind); 611 OMPRTL__kmpc_cancellationpoint, 612 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 613 // kmp_int32 cncl_kind); 614 OMPRTL__kmpc_cancel, 615 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 616 // kmp_int32 num_teams, kmp_int32 thread_limit); 617 OMPRTL__kmpc_push_num_teams, 618 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 619 // microtask, ...); 620 OMPRTL__kmpc_fork_teams, 621 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 622 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 623 // sched, kmp_uint64 grainsize, void *task_dup); 624 OMPRTL__kmpc_taskloop, 625 626 // 627 // Offloading related calls 628 // 629 // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 630 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 631 // *arg_types); 632 OMPRTL__tgt_target, 633 // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 634 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 635 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 636 OMPRTL__tgt_target_teams, 637 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 638 OMPRTL__tgt_register_lib, 639 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 640 OMPRTL__tgt_unregister_lib, 641 // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 642 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 643 OMPRTL__tgt_target_data_begin, 644 // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 645 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 646 OMPRTL__tgt_target_data_end, 647 }; 648 649 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 650 /// region. 651 class CleanupTy final : public EHScopeStack::Cleanup { 652 PrePostActionTy *Action; 653 654 public: 655 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 656 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 657 if (!CGF.HaveInsertPoint()) 658 return; 659 Action->Exit(CGF); 660 } 661 }; 662 663 } // anonymous namespace 664 665 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 666 CodeGenFunction::RunCleanupsScope Scope(CGF); 667 if (PrePostAction) { 668 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 669 Callback(CodeGen, CGF, *PrePostAction); 670 } else { 671 PrePostActionTy Action; 672 Callback(CodeGen, CGF, Action); 673 } 674 } 675 676 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 677 return CGF.EmitLoadOfPointerLValue( 678 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 679 getThreadIDVariable()->getType()->castAs<PointerType>()); 680 } 681 682 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 683 if (!CGF.HaveInsertPoint()) 684 return; 685 // 1.2.2 OpenMP Language Terminology 686 // Structured block - An executable statement with a single entry at the 687 // top and a single exit at the bottom. 688 // The point of exit cannot be a branch out of the structured block. 689 // longjmp() and throw() must not violate the entry/exit criteria. 690 CGF.EHStack.pushTerminate(); 691 CodeGen(CGF); 692 CGF.EHStack.popTerminate(); 693 } 694 695 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 696 CodeGenFunction &CGF) { 697 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 698 getThreadIDVariable()->getType(), 699 AlignmentSource::Decl); 700 } 701 702 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 703 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 704 IdentTy = llvm::StructType::create( 705 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 706 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 707 CGM.Int8PtrTy /* psource */, nullptr); 708 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 709 710 loadOffloadInfoMetadata(); 711 } 712 713 void CGOpenMPRuntime::clear() { 714 InternalVars.clear(); 715 } 716 717 static llvm::Function * 718 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 719 const Expr *CombinerInitializer, const VarDecl *In, 720 const VarDecl *Out, bool IsCombiner) { 721 // void .omp_combiner.(Ty *in, Ty *out); 722 auto &C = CGM.getContext(); 723 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 724 FunctionArgList Args; 725 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 726 /*Id=*/nullptr, PtrTy); 727 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 728 /*Id=*/nullptr, PtrTy); 729 Args.push_back(&OmpOutParm); 730 Args.push_back(&OmpInParm); 731 auto &FnInfo = 732 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 733 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 734 auto *Fn = llvm::Function::Create( 735 FnTy, llvm::GlobalValue::InternalLinkage, 736 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 737 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 738 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 739 CodeGenFunction CGF(CGM); 740 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 741 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 742 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 743 CodeGenFunction::OMPPrivateScope Scope(CGF); 744 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 745 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 746 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 747 .getAddress(); 748 }); 749 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 750 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 751 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 752 .getAddress(); 753 }); 754 (void)Scope.Privatize(); 755 CGF.EmitIgnoredExpr(CombinerInitializer); 756 Scope.ForceCleanup(); 757 CGF.FinishFunction(); 758 return Fn; 759 } 760 761 void CGOpenMPRuntime::emitUserDefinedReduction( 762 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 763 if (UDRMap.count(D) > 0) 764 return; 765 auto &C = CGM.getContext(); 766 if (!In || !Out) { 767 In = &C.Idents.get("omp_in"); 768 Out = &C.Idents.get("omp_out"); 769 } 770 llvm::Function *Combiner = emitCombinerOrInitializer( 771 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 772 cast<VarDecl>(D->lookup(Out).front()), 773 /*IsCombiner=*/true); 774 llvm::Function *Initializer = nullptr; 775 if (auto *Init = D->getInitializer()) { 776 if (!Priv || !Orig) { 777 Priv = &C.Idents.get("omp_priv"); 778 Orig = &C.Idents.get("omp_orig"); 779 } 780 Initializer = emitCombinerOrInitializer( 781 CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), 782 cast<VarDecl>(D->lookup(Priv).front()), 783 /*IsCombiner=*/false); 784 } 785 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 786 if (CGF) { 787 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 788 Decls.second.push_back(D); 789 } 790 } 791 792 std::pair<llvm::Function *, llvm::Function *> 793 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 794 auto I = UDRMap.find(D); 795 if (I != UDRMap.end()) 796 return I->second; 797 emitUserDefinedReduction(/*CGF=*/nullptr, D); 798 return UDRMap.lookup(D); 799 } 800 801 // Layout information for ident_t. 802 static CharUnits getIdentAlign(CodeGenModule &CGM) { 803 return CGM.getPointerAlign(); 804 } 805 static CharUnits getIdentSize(CodeGenModule &CGM) { 806 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 807 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 808 } 809 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 810 // All the fields except the last are i32, so this works beautifully. 811 return unsigned(Field) * CharUnits::fromQuantity(4); 812 } 813 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 814 IdentFieldIndex Field, 815 const llvm::Twine &Name = "") { 816 auto Offset = getOffsetOfIdentField(Field); 817 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 818 } 819 820 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( 821 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 822 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 823 assert(ThreadIDVar->getType()->isPointerType() && 824 "thread id variable must be of type kmp_int32 *"); 825 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 826 CodeGenFunction CGF(CGM, true); 827 bool HasCancel = false; 828 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 829 HasCancel = OPD->hasCancel(); 830 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 831 HasCancel = OPSD->hasCancel(); 832 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 833 HasCancel = OPFD->hasCancel(); 834 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 835 HasCancel); 836 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 837 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 838 } 839 840 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 841 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 842 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 843 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 844 bool Tied, unsigned &NumberOfParts) { 845 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 846 PrePostActionTy &) { 847 auto *ThreadID = getThreadID(CGF, D.getLocStart()); 848 auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); 849 llvm::Value *TaskArgs[] = { 850 UpLoc, ThreadID, 851 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 852 TaskTVar->getType()->castAs<PointerType>()) 853 .getPointer()}; 854 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 855 }; 856 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 857 UntiedCodeGen); 858 CodeGen.setAction(Action); 859 assert(!ThreadIDVar->getType()->isPointerType() && 860 "thread id variable must be of type kmp_int32 for tasks"); 861 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 862 auto *TD = dyn_cast<OMPTaskDirective>(&D); 863 CodeGenFunction CGF(CGM, true); 864 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 865 InnermostKind, 866 TD ? TD->hasCancel() : false, Action); 867 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 868 auto *Res = CGF.GenerateCapturedStmtFunction(*CS); 869 if (!Tied) 870 NumberOfParts = Action.getNumberOfParts(); 871 return Res; 872 } 873 874 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 875 CharUnits Align = getIdentAlign(CGM); 876 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 877 if (!Entry) { 878 if (!DefaultOpenMPPSource) { 879 // Initialize default location for psource field of ident_t structure of 880 // all ident_t objects. Format is ";file;function;line;column;;". 881 // Taken from 882 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 883 DefaultOpenMPPSource = 884 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 885 DefaultOpenMPPSource = 886 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 887 } 888 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 889 CGM.getModule(), IdentTy, /*isConstant*/ true, 890 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 891 DefaultOpenMPLocation->setUnnamedAddr(true); 892 DefaultOpenMPLocation->setAlignment(Align.getQuantity()); 893 894 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 895 llvm::Constant *Values[] = {Zero, 896 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 897 Zero, Zero, DefaultOpenMPPSource}; 898 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 899 DefaultOpenMPLocation->setInitializer(Init); 900 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 901 } 902 return Address(Entry, Align); 903 } 904 905 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 906 SourceLocation Loc, 907 unsigned Flags) { 908 Flags |= OMP_IDENT_KMPC; 909 // If no debug info is generated - return global default location. 910 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 911 Loc.isInvalid()) 912 return getOrCreateDefaultLocation(Flags).getPointer(); 913 914 assert(CGF.CurFn && "No function in current CodeGenFunction."); 915 916 Address LocValue = Address::invalid(); 917 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 918 if (I != OpenMPLocThreadIDMap.end()) 919 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 920 921 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 922 // GetOpenMPThreadID was called before this routine. 923 if (!LocValue.isValid()) { 924 // Generate "ident_t .kmpc_loc.addr;" 925 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 926 ".kmpc_loc.addr"); 927 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 928 Elem.second.DebugLoc = AI.getPointer(); 929 LocValue = AI; 930 931 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 932 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 933 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 934 CGM.getSize(getIdentSize(CGF.CGM))); 935 } 936 937 // char **psource = &.kmpc_loc_<flags>.addr.psource; 938 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 939 940 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 941 if (OMPDebugLoc == nullptr) { 942 SmallString<128> Buffer2; 943 llvm::raw_svector_ostream OS2(Buffer2); 944 // Build debug location 945 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 946 OS2 << ";" << PLoc.getFilename() << ";"; 947 if (const FunctionDecl *FD = 948 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 949 OS2 << FD->getQualifiedNameAsString(); 950 } 951 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 952 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 953 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 954 } 955 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 956 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 957 958 // Our callers always pass this to a runtime function, so for 959 // convenience, go ahead and return a naked pointer. 960 return LocValue.getPointer(); 961 } 962 963 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 964 SourceLocation Loc) { 965 assert(CGF.CurFn && "No function in current CodeGenFunction."); 966 967 llvm::Value *ThreadID = nullptr; 968 // Check whether we've already cached a load of the thread id in this 969 // function. 970 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 971 if (I != OpenMPLocThreadIDMap.end()) { 972 ThreadID = I->second.ThreadID; 973 if (ThreadID != nullptr) 974 return ThreadID; 975 } 976 if (auto *OMPRegionInfo = 977 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 978 if (OMPRegionInfo->getThreadIDVariable()) { 979 // Check if this an outlined function with thread id passed as argument. 980 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 981 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 982 // If value loaded in entry block, cache it and use it everywhere in 983 // function. 984 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 985 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 986 Elem.second.ThreadID = ThreadID; 987 } 988 return ThreadID; 989 } 990 } 991 992 // This is not an outlined function region - need to call __kmpc_int32 993 // kmpc_global_thread_num(ident_t *loc). 994 // Generate thread id value and cache this value for use across the 995 // function. 996 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 997 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 998 ThreadID = 999 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1000 emitUpdateLocation(CGF, Loc)); 1001 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1002 Elem.second.ThreadID = ThreadID; 1003 return ThreadID; 1004 } 1005 1006 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1007 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1008 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 1009 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1010 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1011 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 1012 UDRMap.erase(D); 1013 } 1014 FunctionUDRMap.erase(CGF.CurFn); 1015 } 1016 } 1017 1018 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1019 if (!IdentTy) { 1020 } 1021 return llvm::PointerType::getUnqual(IdentTy); 1022 } 1023 1024 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1025 if (!Kmpc_MicroTy) { 1026 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1027 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1028 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1029 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1030 } 1031 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1032 } 1033 1034 llvm::Constant * 1035 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1036 llvm::Constant *RTLFn = nullptr; 1037 switch (static_cast<OpenMPRTLFunction>(Function)) { 1038 case OMPRTL__kmpc_fork_call: { 1039 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1040 // microtask, ...); 1041 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1042 getKmpc_MicroPointerTy()}; 1043 llvm::FunctionType *FnTy = 1044 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1045 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1046 break; 1047 } 1048 case OMPRTL__kmpc_global_thread_num: { 1049 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1050 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1051 llvm::FunctionType *FnTy = 1052 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1053 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1054 break; 1055 } 1056 case OMPRTL__kmpc_threadprivate_cached: { 1057 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1058 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1059 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1060 CGM.VoidPtrTy, CGM.SizeTy, 1061 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1062 llvm::FunctionType *FnTy = 1063 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1064 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1065 break; 1066 } 1067 case OMPRTL__kmpc_critical: { 1068 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1069 // kmp_critical_name *crit); 1070 llvm::Type *TypeParams[] = { 1071 getIdentTyPointerTy(), CGM.Int32Ty, 1072 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1073 llvm::FunctionType *FnTy = 1074 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1075 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1076 break; 1077 } 1078 case OMPRTL__kmpc_critical_with_hint: { 1079 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1080 // kmp_critical_name *crit, uintptr_t hint); 1081 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1082 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1083 CGM.IntPtrTy}; 1084 llvm::FunctionType *FnTy = 1085 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1086 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1087 break; 1088 } 1089 case OMPRTL__kmpc_threadprivate_register: { 1090 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1091 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1092 // typedef void *(*kmpc_ctor)(void *); 1093 auto KmpcCtorTy = 1094 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1095 /*isVarArg*/ false)->getPointerTo(); 1096 // typedef void *(*kmpc_cctor)(void *, void *); 1097 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1098 auto KmpcCopyCtorTy = 1099 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1100 /*isVarArg*/ false)->getPointerTo(); 1101 // typedef void (*kmpc_dtor)(void *); 1102 auto KmpcDtorTy = 1103 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1104 ->getPointerTo(); 1105 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1106 KmpcCopyCtorTy, KmpcDtorTy}; 1107 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1108 /*isVarArg*/ false); 1109 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1110 break; 1111 } 1112 case OMPRTL__kmpc_end_critical: { 1113 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1114 // kmp_critical_name *crit); 1115 llvm::Type *TypeParams[] = { 1116 getIdentTyPointerTy(), CGM.Int32Ty, 1117 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1118 llvm::FunctionType *FnTy = 1119 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1120 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1121 break; 1122 } 1123 case OMPRTL__kmpc_cancel_barrier: { 1124 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1125 // global_tid); 1126 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1127 llvm::FunctionType *FnTy = 1128 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1129 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1130 break; 1131 } 1132 case OMPRTL__kmpc_barrier: { 1133 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1134 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1135 llvm::FunctionType *FnTy = 1136 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1137 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1138 break; 1139 } 1140 case OMPRTL__kmpc_for_static_fini: { 1141 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1142 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1143 llvm::FunctionType *FnTy = 1144 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1145 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1146 break; 1147 } 1148 case OMPRTL__kmpc_push_num_threads: { 1149 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1150 // kmp_int32 num_threads) 1151 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1152 CGM.Int32Ty}; 1153 llvm::FunctionType *FnTy = 1154 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1155 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1156 break; 1157 } 1158 case OMPRTL__kmpc_serialized_parallel: { 1159 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1160 // global_tid); 1161 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1162 llvm::FunctionType *FnTy = 1163 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1164 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1165 break; 1166 } 1167 case OMPRTL__kmpc_end_serialized_parallel: { 1168 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1169 // global_tid); 1170 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1171 llvm::FunctionType *FnTy = 1172 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1173 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1174 break; 1175 } 1176 case OMPRTL__kmpc_flush: { 1177 // Build void __kmpc_flush(ident_t *loc); 1178 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1179 llvm::FunctionType *FnTy = 1180 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1181 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1182 break; 1183 } 1184 case OMPRTL__kmpc_master: { 1185 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1186 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1187 llvm::FunctionType *FnTy = 1188 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1189 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1190 break; 1191 } 1192 case OMPRTL__kmpc_end_master: { 1193 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1194 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1195 llvm::FunctionType *FnTy = 1196 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1197 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1198 break; 1199 } 1200 case OMPRTL__kmpc_omp_taskyield: { 1201 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1202 // int end_part); 1203 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1204 llvm::FunctionType *FnTy = 1205 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1206 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1207 break; 1208 } 1209 case OMPRTL__kmpc_single: { 1210 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1211 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1212 llvm::FunctionType *FnTy = 1213 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1214 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1215 break; 1216 } 1217 case OMPRTL__kmpc_end_single: { 1218 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1219 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1220 llvm::FunctionType *FnTy = 1221 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1222 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1223 break; 1224 } 1225 case OMPRTL__kmpc_omp_task_alloc: { 1226 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1227 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1228 // kmp_routine_entry_t *task_entry); 1229 assert(KmpRoutineEntryPtrTy != nullptr && 1230 "Type kmp_routine_entry_t must be created."); 1231 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1232 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1233 // Return void * and then cast to particular kmp_task_t type. 1234 llvm::FunctionType *FnTy = 1235 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1236 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1237 break; 1238 } 1239 case OMPRTL__kmpc_omp_task: { 1240 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1241 // *new_task); 1242 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1243 CGM.VoidPtrTy}; 1244 llvm::FunctionType *FnTy = 1245 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1246 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1247 break; 1248 } 1249 case OMPRTL__kmpc_copyprivate: { 1250 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1251 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1252 // kmp_int32 didit); 1253 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1254 auto *CpyFnTy = 1255 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1256 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1257 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1258 CGM.Int32Ty}; 1259 llvm::FunctionType *FnTy = 1260 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1261 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1262 break; 1263 } 1264 case OMPRTL__kmpc_reduce: { 1265 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1266 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1267 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1268 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1269 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1270 /*isVarArg=*/false); 1271 llvm::Type *TypeParams[] = { 1272 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1273 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1274 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1275 llvm::FunctionType *FnTy = 1276 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1277 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1278 break; 1279 } 1280 case OMPRTL__kmpc_reduce_nowait: { 1281 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1282 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1283 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1284 // *lck); 1285 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1286 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1287 /*isVarArg=*/false); 1288 llvm::Type *TypeParams[] = { 1289 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1290 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1291 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1292 llvm::FunctionType *FnTy = 1293 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1294 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1295 break; 1296 } 1297 case OMPRTL__kmpc_end_reduce: { 1298 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1299 // kmp_critical_name *lck); 1300 llvm::Type *TypeParams[] = { 1301 getIdentTyPointerTy(), CGM.Int32Ty, 1302 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1303 llvm::FunctionType *FnTy = 1304 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1305 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1306 break; 1307 } 1308 case OMPRTL__kmpc_end_reduce_nowait: { 1309 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1310 // kmp_critical_name *lck); 1311 llvm::Type *TypeParams[] = { 1312 getIdentTyPointerTy(), CGM.Int32Ty, 1313 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1314 llvm::FunctionType *FnTy = 1315 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1316 RTLFn = 1317 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1318 break; 1319 } 1320 case OMPRTL__kmpc_omp_task_begin_if0: { 1321 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1322 // *new_task); 1323 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1324 CGM.VoidPtrTy}; 1325 llvm::FunctionType *FnTy = 1326 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1327 RTLFn = 1328 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1329 break; 1330 } 1331 case OMPRTL__kmpc_omp_task_complete_if0: { 1332 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1333 // *new_task); 1334 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1335 CGM.VoidPtrTy}; 1336 llvm::FunctionType *FnTy = 1337 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1338 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1339 /*Name=*/"__kmpc_omp_task_complete_if0"); 1340 break; 1341 } 1342 case OMPRTL__kmpc_ordered: { 1343 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1344 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1345 llvm::FunctionType *FnTy = 1346 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1347 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1348 break; 1349 } 1350 case OMPRTL__kmpc_end_ordered: { 1351 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1352 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1353 llvm::FunctionType *FnTy = 1354 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1355 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1356 break; 1357 } 1358 case OMPRTL__kmpc_omp_taskwait: { 1359 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1360 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1361 llvm::FunctionType *FnTy = 1362 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1363 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1364 break; 1365 } 1366 case OMPRTL__kmpc_taskgroup: { 1367 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1368 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1369 llvm::FunctionType *FnTy = 1370 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1371 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1372 break; 1373 } 1374 case OMPRTL__kmpc_end_taskgroup: { 1375 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1376 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1377 llvm::FunctionType *FnTy = 1378 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1379 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1380 break; 1381 } 1382 case OMPRTL__kmpc_push_proc_bind: { 1383 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1384 // int proc_bind) 1385 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1386 llvm::FunctionType *FnTy = 1387 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1388 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1389 break; 1390 } 1391 case OMPRTL__kmpc_omp_task_with_deps: { 1392 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1393 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1394 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1395 llvm::Type *TypeParams[] = { 1396 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1397 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1398 llvm::FunctionType *FnTy = 1399 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1400 RTLFn = 1401 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1402 break; 1403 } 1404 case OMPRTL__kmpc_omp_wait_deps: { 1405 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1406 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1407 // kmp_depend_info_t *noalias_dep_list); 1408 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1409 CGM.Int32Ty, CGM.VoidPtrTy, 1410 CGM.Int32Ty, CGM.VoidPtrTy}; 1411 llvm::FunctionType *FnTy = 1412 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1413 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1414 break; 1415 } 1416 case OMPRTL__kmpc_cancellationpoint: { 1417 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1418 // global_tid, kmp_int32 cncl_kind) 1419 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1420 llvm::FunctionType *FnTy = 1421 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1422 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1423 break; 1424 } 1425 case OMPRTL__kmpc_cancel: { 1426 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1427 // kmp_int32 cncl_kind) 1428 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1429 llvm::FunctionType *FnTy = 1430 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1431 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1432 break; 1433 } 1434 case OMPRTL__kmpc_push_num_teams: { 1435 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1436 // kmp_int32 num_teams, kmp_int32 num_threads) 1437 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1438 CGM.Int32Ty}; 1439 llvm::FunctionType *FnTy = 1440 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1441 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1442 break; 1443 } 1444 case OMPRTL__kmpc_fork_teams: { 1445 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1446 // microtask, ...); 1447 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1448 getKmpc_MicroPointerTy()}; 1449 llvm::FunctionType *FnTy = 1450 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1451 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1452 break; 1453 } 1454 case OMPRTL__kmpc_taskloop: { 1455 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 1456 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 1457 // sched, kmp_uint64 grainsize, void *task_dup); 1458 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 1459 CGM.IntTy, 1460 CGM.VoidPtrTy, 1461 CGM.IntTy, 1462 CGM.Int64Ty->getPointerTo(), 1463 CGM.Int64Ty->getPointerTo(), 1464 CGM.Int64Ty, 1465 CGM.IntTy, 1466 CGM.IntTy, 1467 CGM.Int64Ty, 1468 CGM.VoidPtrTy}; 1469 llvm::FunctionType *FnTy = 1470 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1471 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 1472 break; 1473 } 1474 case OMPRTL__tgt_target: { 1475 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 1476 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 1477 // *arg_types); 1478 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1479 CGM.VoidPtrTy, 1480 CGM.Int32Ty, 1481 CGM.VoidPtrPtrTy, 1482 CGM.VoidPtrPtrTy, 1483 CGM.SizeTy->getPointerTo(), 1484 CGM.Int32Ty->getPointerTo()}; 1485 llvm::FunctionType *FnTy = 1486 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1487 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 1488 break; 1489 } 1490 case OMPRTL__tgt_target_teams: { 1491 // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 1492 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 1493 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 1494 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1495 CGM.VoidPtrTy, 1496 CGM.Int32Ty, 1497 CGM.VoidPtrPtrTy, 1498 CGM.VoidPtrPtrTy, 1499 CGM.SizeTy->getPointerTo(), 1500 CGM.Int32Ty->getPointerTo(), 1501 CGM.Int32Ty, 1502 CGM.Int32Ty}; 1503 llvm::FunctionType *FnTy = 1504 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1505 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 1506 break; 1507 } 1508 case OMPRTL__tgt_register_lib: { 1509 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 1510 QualType ParamTy = 1511 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1512 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1513 llvm::FunctionType *FnTy = 1514 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1515 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 1516 break; 1517 } 1518 case OMPRTL__tgt_unregister_lib: { 1519 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 1520 QualType ParamTy = 1521 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1522 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1523 llvm::FunctionType *FnTy = 1524 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1525 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 1526 break; 1527 } 1528 case OMPRTL__tgt_target_data_begin: { 1529 // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, 1530 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1531 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1532 CGM.Int32Ty, 1533 CGM.VoidPtrPtrTy, 1534 CGM.VoidPtrPtrTy, 1535 CGM.SizeTy->getPointerTo(), 1536 CGM.Int32Ty->getPointerTo()}; 1537 llvm::FunctionType *FnTy = 1538 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1539 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 1540 break; 1541 } 1542 case OMPRTL__tgt_target_data_end: { 1543 // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, 1544 // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); 1545 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1546 CGM.Int32Ty, 1547 CGM.VoidPtrPtrTy, 1548 CGM.VoidPtrPtrTy, 1549 CGM.SizeTy->getPointerTo(), 1550 CGM.Int32Ty->getPointerTo()}; 1551 llvm::FunctionType *FnTy = 1552 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1553 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 1554 break; 1555 } 1556 } 1557 assert(RTLFn && "Unable to find OpenMP runtime function"); 1558 return RTLFn; 1559 } 1560 1561 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 1562 bool IVSigned) { 1563 assert((IVSize == 32 || IVSize == 64) && 1564 "IV size is not compatible with the omp runtime"); 1565 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1566 : "__kmpc_for_static_init_4u") 1567 : (IVSigned ? "__kmpc_for_static_init_8" 1568 : "__kmpc_for_static_init_8u"); 1569 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1570 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1571 llvm::Type *TypeParams[] = { 1572 getIdentTyPointerTy(), // loc 1573 CGM.Int32Ty, // tid 1574 CGM.Int32Ty, // schedtype 1575 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1576 PtrTy, // p_lower 1577 PtrTy, // p_upper 1578 PtrTy, // p_stride 1579 ITy, // incr 1580 ITy // chunk 1581 }; 1582 llvm::FunctionType *FnTy = 1583 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1584 return CGM.CreateRuntimeFunction(FnTy, Name); 1585 } 1586 1587 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 1588 bool IVSigned) { 1589 assert((IVSize == 32 || IVSize == 64) && 1590 "IV size is not compatible with the omp runtime"); 1591 auto Name = 1592 IVSize == 32 1593 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1594 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1595 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1596 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1597 CGM.Int32Ty, // tid 1598 CGM.Int32Ty, // schedtype 1599 ITy, // lower 1600 ITy, // upper 1601 ITy, // stride 1602 ITy // chunk 1603 }; 1604 llvm::FunctionType *FnTy = 1605 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1606 return CGM.CreateRuntimeFunction(FnTy, Name); 1607 } 1608 1609 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1610 bool IVSigned) { 1611 assert((IVSize == 32 || IVSize == 64) && 1612 "IV size is not compatible with the omp runtime"); 1613 auto Name = 1614 IVSize == 32 1615 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1616 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1617 llvm::Type *TypeParams[] = { 1618 getIdentTyPointerTy(), // loc 1619 CGM.Int32Ty, // tid 1620 }; 1621 llvm::FunctionType *FnTy = 1622 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1623 return CGM.CreateRuntimeFunction(FnTy, Name); 1624 } 1625 1626 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1627 bool IVSigned) { 1628 assert((IVSize == 32 || IVSize == 64) && 1629 "IV size is not compatible with the omp runtime"); 1630 auto Name = 1631 IVSize == 32 1632 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1633 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1634 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1635 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1636 llvm::Type *TypeParams[] = { 1637 getIdentTyPointerTy(), // loc 1638 CGM.Int32Ty, // tid 1639 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1640 PtrTy, // p_lower 1641 PtrTy, // p_upper 1642 PtrTy // p_stride 1643 }; 1644 llvm::FunctionType *FnTy = 1645 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1646 return CGM.CreateRuntimeFunction(FnTy, Name); 1647 } 1648 1649 llvm::Constant * 1650 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1651 assert(!CGM.getLangOpts().OpenMPUseTLS || 1652 !CGM.getContext().getTargetInfo().isTLSSupported()); 1653 // Lookup the entry, lazily creating it if necessary. 1654 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1655 Twine(CGM.getMangledName(VD)) + ".cache."); 1656 } 1657 1658 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1659 const VarDecl *VD, 1660 Address VDAddr, 1661 SourceLocation Loc) { 1662 if (CGM.getLangOpts().OpenMPUseTLS && 1663 CGM.getContext().getTargetInfo().isTLSSupported()) 1664 return VDAddr; 1665 1666 auto VarTy = VDAddr.getElementType(); 1667 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1668 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1669 CGM.Int8PtrTy), 1670 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1671 getOrCreateThreadPrivateCache(VD)}; 1672 return Address(CGF.EmitRuntimeCall( 1673 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1674 VDAddr.getAlignment()); 1675 } 1676 1677 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1678 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1679 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1680 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1681 // library. 1682 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1683 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1684 OMPLoc); 1685 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1686 // to register constructor/destructor for variable. 1687 llvm::Value *Args[] = {OMPLoc, 1688 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1689 CGM.VoidPtrTy), 1690 Ctor, CopyCtor, Dtor}; 1691 CGF.EmitRuntimeCall( 1692 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1693 } 1694 1695 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1696 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1697 bool PerformInit, CodeGenFunction *CGF) { 1698 if (CGM.getLangOpts().OpenMPUseTLS && 1699 CGM.getContext().getTargetInfo().isTLSSupported()) 1700 return nullptr; 1701 1702 VD = VD->getDefinition(CGM.getContext()); 1703 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1704 ThreadPrivateWithDefinition.insert(VD); 1705 QualType ASTTy = VD->getType(); 1706 1707 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1708 auto Init = VD->getAnyInitializer(); 1709 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1710 // Generate function that re-emits the declaration's initializer into the 1711 // threadprivate copy of the variable VD 1712 CodeGenFunction CtorCGF(CGM); 1713 FunctionArgList Args; 1714 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1715 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1716 Args.push_back(&Dst); 1717 1718 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1719 CGM.getContext().VoidPtrTy, Args); 1720 auto FTy = CGM.getTypes().GetFunctionType(FI); 1721 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1722 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1723 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1724 Args, SourceLocation()); 1725 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1726 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1727 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1728 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1729 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1730 CtorCGF.ConvertTypeForMem(ASTTy)); 1731 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1732 /*IsInitializer=*/true); 1733 ArgVal = CtorCGF.EmitLoadOfScalar( 1734 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1735 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1736 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1737 CtorCGF.FinishFunction(); 1738 Ctor = Fn; 1739 } 1740 if (VD->getType().isDestructedType() != QualType::DK_none) { 1741 // Generate function that emits destructor call for the threadprivate copy 1742 // of the variable VD 1743 CodeGenFunction DtorCGF(CGM); 1744 FunctionArgList Args; 1745 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1746 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1747 Args.push_back(&Dst); 1748 1749 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1750 CGM.getContext().VoidTy, Args); 1751 auto FTy = CGM.getTypes().GetFunctionType(FI); 1752 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1753 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1754 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1755 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1756 SourceLocation()); 1757 // Create a scope with an artificial location for the body of this function. 1758 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1759 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1760 DtorCGF.GetAddrOfLocalVar(&Dst), 1761 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1762 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1763 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1764 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1765 DtorCGF.FinishFunction(); 1766 Dtor = Fn; 1767 } 1768 // Do not emit init function if it is not required. 1769 if (!Ctor && !Dtor) 1770 return nullptr; 1771 1772 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1773 auto CopyCtorTy = 1774 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1775 /*isVarArg=*/false)->getPointerTo(); 1776 // Copying constructor for the threadprivate variable. 1777 // Must be NULL - reserved by runtime, but currently it requires that this 1778 // parameter is always NULL. Otherwise it fires assertion. 1779 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1780 if (Ctor == nullptr) { 1781 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1782 /*isVarArg=*/false)->getPointerTo(); 1783 Ctor = llvm::Constant::getNullValue(CtorTy); 1784 } 1785 if (Dtor == nullptr) { 1786 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1787 /*isVarArg=*/false)->getPointerTo(); 1788 Dtor = llvm::Constant::getNullValue(DtorTy); 1789 } 1790 if (!CGF) { 1791 auto InitFunctionTy = 1792 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1793 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1794 InitFunctionTy, ".__omp_threadprivate_init_.", 1795 CGM.getTypes().arrangeNullaryFunction()); 1796 CodeGenFunction InitCGF(CGM); 1797 FunctionArgList ArgList; 1798 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1799 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1800 Loc); 1801 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1802 InitCGF.FinishFunction(); 1803 return InitFunction; 1804 } 1805 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1806 } 1807 return nullptr; 1808 } 1809 1810 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1811 /// function. Here is the logic: 1812 /// if (Cond) { 1813 /// ThenGen(); 1814 /// } else { 1815 /// ElseGen(); 1816 /// } 1817 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1818 const RegionCodeGenTy &ThenGen, 1819 const RegionCodeGenTy &ElseGen) { 1820 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1821 1822 // If the condition constant folds and can be elided, try to avoid emitting 1823 // the condition and the dead arm of the if/else. 1824 bool CondConstant; 1825 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1826 if (CondConstant) 1827 ThenGen(CGF); 1828 else 1829 ElseGen(CGF); 1830 return; 1831 } 1832 1833 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1834 // emit the conditional branch. 1835 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1836 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1837 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1838 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1839 1840 // Emit the 'then' code. 1841 CGF.EmitBlock(ThenBlock); 1842 ThenGen(CGF); 1843 CGF.EmitBranch(ContBlock); 1844 // Emit the 'else' code if present. 1845 // There is no need to emit line number for unconditional branch. 1846 (void)ApplyDebugLocation::CreateEmpty(CGF); 1847 CGF.EmitBlock(ElseBlock); 1848 ElseGen(CGF); 1849 // There is no need to emit line number for unconditional branch. 1850 (void)ApplyDebugLocation::CreateEmpty(CGF); 1851 CGF.EmitBranch(ContBlock); 1852 // Emit the continuation block for code after the if. 1853 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1854 } 1855 1856 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1857 llvm::Value *OutlinedFn, 1858 ArrayRef<llvm::Value *> CapturedVars, 1859 const Expr *IfCond) { 1860 if (!CGF.HaveInsertPoint()) 1861 return; 1862 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1863 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 1864 PrePostActionTy &) { 1865 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1866 auto &RT = CGF.CGM.getOpenMPRuntime(); 1867 llvm::Value *Args[] = { 1868 RTLoc, 1869 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1870 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1871 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1872 RealArgs.append(std::begin(Args), std::end(Args)); 1873 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1874 1875 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 1876 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1877 }; 1878 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 1879 PrePostActionTy &) { 1880 auto &RT = CGF.CGM.getOpenMPRuntime(); 1881 auto ThreadID = RT.getThreadID(CGF, Loc); 1882 // Build calls: 1883 // __kmpc_serialized_parallel(&Loc, GTid); 1884 llvm::Value *Args[] = {RTLoc, ThreadID}; 1885 CGF.EmitRuntimeCall( 1886 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 1887 1888 // OutlinedFn(>id, &zero, CapturedStruct); 1889 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1890 Address ZeroAddr = 1891 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1892 /*Name*/ ".zero.addr"); 1893 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1894 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1895 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1896 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1897 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1898 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1899 1900 // __kmpc_end_serialized_parallel(&Loc, GTid); 1901 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1902 CGF.EmitRuntimeCall( 1903 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 1904 EndArgs); 1905 }; 1906 if (IfCond) 1907 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1908 else { 1909 RegionCodeGenTy ThenRCG(ThenGen); 1910 ThenRCG(CGF); 1911 } 1912 } 1913 1914 // If we're inside an (outlined) parallel region, use the region info's 1915 // thread-ID variable (it is passed in a first argument of the outlined function 1916 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1917 // regular serial code region, get thread ID by calling kmp_int32 1918 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1919 // return the address of that temp. 1920 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1921 SourceLocation Loc) { 1922 if (auto *OMPRegionInfo = 1923 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1924 if (OMPRegionInfo->getThreadIDVariable()) 1925 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1926 1927 auto ThreadID = getThreadID(CGF, Loc); 1928 auto Int32Ty = 1929 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1930 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1931 CGF.EmitStoreOfScalar(ThreadID, 1932 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1933 1934 return ThreadIDTemp; 1935 } 1936 1937 llvm::Constant * 1938 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1939 const llvm::Twine &Name) { 1940 SmallString<256> Buffer; 1941 llvm::raw_svector_ostream Out(Buffer); 1942 Out << Name; 1943 auto RuntimeName = Out.str(); 1944 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1945 if (Elem.second) { 1946 assert(Elem.second->getType()->getPointerElementType() == Ty && 1947 "OMP internal variable has different type than requested"); 1948 return &*Elem.second; 1949 } 1950 1951 return Elem.second = new llvm::GlobalVariable( 1952 CGM.getModule(), Ty, /*IsConstant*/ false, 1953 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1954 Elem.first()); 1955 } 1956 1957 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1958 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1959 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1960 } 1961 1962 namespace { 1963 /// Common pre(post)-action for different OpenMP constructs. 1964 class CommonActionTy final : public PrePostActionTy { 1965 llvm::Value *EnterCallee; 1966 ArrayRef<llvm::Value *> EnterArgs; 1967 llvm::Value *ExitCallee; 1968 ArrayRef<llvm::Value *> ExitArgs; 1969 bool Conditional; 1970 llvm::BasicBlock *ContBlock = nullptr; 1971 1972 public: 1973 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 1974 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 1975 bool Conditional = false) 1976 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 1977 ExitArgs(ExitArgs), Conditional(Conditional) {} 1978 void Enter(CodeGenFunction &CGF) override { 1979 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 1980 if (Conditional) { 1981 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 1982 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1983 ContBlock = CGF.createBasicBlock("omp_if.end"); 1984 // Generate the branch (If-stmt) 1985 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1986 CGF.EmitBlock(ThenBlock); 1987 } 1988 } 1989 void Done(CodeGenFunction &CGF) { 1990 // Emit the rest of blocks/branches 1991 CGF.EmitBranch(ContBlock); 1992 CGF.EmitBlock(ContBlock, true); 1993 } 1994 void Exit(CodeGenFunction &CGF) override { 1995 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 1996 } 1997 }; 1998 } // anonymous namespace 1999 2000 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2001 StringRef CriticalName, 2002 const RegionCodeGenTy &CriticalOpGen, 2003 SourceLocation Loc, const Expr *Hint) { 2004 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2005 // CriticalOpGen(); 2006 // __kmpc_end_critical(ident_t *, gtid, Lock); 2007 // Prepare arguments and build a call to __kmpc_critical 2008 if (!CGF.HaveInsertPoint()) 2009 return; 2010 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2011 getCriticalRegionLock(CriticalName)}; 2012 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2013 std::end(Args)); 2014 if (Hint) { 2015 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2016 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 2017 } 2018 CommonActionTy Action( 2019 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 2020 : OMPRTL__kmpc_critical), 2021 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 2022 CriticalOpGen.setAction(Action); 2023 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2024 } 2025 2026 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2027 const RegionCodeGenTy &MasterOpGen, 2028 SourceLocation Loc) { 2029 if (!CGF.HaveInsertPoint()) 2030 return; 2031 // if(__kmpc_master(ident_t *, gtid)) { 2032 // MasterOpGen(); 2033 // __kmpc_end_master(ident_t *, gtid); 2034 // } 2035 // Prepare arguments and build a call to __kmpc_master 2036 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2037 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 2038 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 2039 /*Conditional=*/true); 2040 MasterOpGen.setAction(Action); 2041 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2042 Action.Done(CGF); 2043 } 2044 2045 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2046 SourceLocation Loc) { 2047 if (!CGF.HaveInsertPoint()) 2048 return; 2049 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2050 llvm::Value *Args[] = { 2051 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2052 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2053 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 2054 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2055 Region->emitUntiedSwitch(CGF); 2056 } 2057 2058 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2059 const RegionCodeGenTy &TaskgroupOpGen, 2060 SourceLocation Loc) { 2061 if (!CGF.HaveInsertPoint()) 2062 return; 2063 // __kmpc_taskgroup(ident_t *, gtid); 2064 // TaskgroupOpGen(); 2065 // __kmpc_end_taskgroup(ident_t *, gtid); 2066 // Prepare arguments and build a call to __kmpc_taskgroup 2067 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2068 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 2069 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 2070 Args); 2071 TaskgroupOpGen.setAction(Action); 2072 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2073 } 2074 2075 /// Given an array of pointers to variables, project the address of a 2076 /// given variable. 2077 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2078 unsigned Index, const VarDecl *Var) { 2079 // Pull out the pointer to the variable. 2080 Address PtrAddr = 2081 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 2082 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2083 2084 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2085 Addr = CGF.Builder.CreateElementBitCast( 2086 Addr, CGF.ConvertTypeForMem(Var->getType())); 2087 return Addr; 2088 } 2089 2090 static llvm::Value *emitCopyprivateCopyFunction( 2091 CodeGenModule &CGM, llvm::Type *ArgsType, 2092 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2093 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 2094 auto &C = CGM.getContext(); 2095 // void copy_func(void *LHSArg, void *RHSArg); 2096 FunctionArgList Args; 2097 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2098 C.VoidPtrTy); 2099 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2100 C.VoidPtrTy); 2101 Args.push_back(&LHSArg); 2102 Args.push_back(&RHSArg); 2103 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2104 auto *Fn = llvm::Function::Create( 2105 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2106 ".omp.copyprivate.copy_func", &CGM.getModule()); 2107 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2108 CodeGenFunction CGF(CGM); 2109 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2110 // Dest = (void*[n])(LHSArg); 2111 // Src = (void*[n])(RHSArg); 2112 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2113 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2114 ArgsType), CGF.getPointerAlign()); 2115 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2116 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2117 ArgsType), CGF.getPointerAlign()); 2118 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2119 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2120 // ... 2121 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2122 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2123 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2124 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2125 2126 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2127 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2128 2129 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2130 QualType Type = VD->getType(); 2131 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2132 } 2133 CGF.FinishFunction(); 2134 return Fn; 2135 } 2136 2137 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2138 const RegionCodeGenTy &SingleOpGen, 2139 SourceLocation Loc, 2140 ArrayRef<const Expr *> CopyprivateVars, 2141 ArrayRef<const Expr *> SrcExprs, 2142 ArrayRef<const Expr *> DstExprs, 2143 ArrayRef<const Expr *> AssignmentOps) { 2144 if (!CGF.HaveInsertPoint()) 2145 return; 2146 assert(CopyprivateVars.size() == SrcExprs.size() && 2147 CopyprivateVars.size() == DstExprs.size() && 2148 CopyprivateVars.size() == AssignmentOps.size()); 2149 auto &C = CGM.getContext(); 2150 // int32 did_it = 0; 2151 // if(__kmpc_single(ident_t *, gtid)) { 2152 // SingleOpGen(); 2153 // __kmpc_end_single(ident_t *, gtid); 2154 // did_it = 1; 2155 // } 2156 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2157 // <copy_func>, did_it); 2158 2159 Address DidIt = Address::invalid(); 2160 if (!CopyprivateVars.empty()) { 2161 // int32 did_it = 0; 2162 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2163 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2164 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2165 } 2166 // Prepare arguments and build a call to __kmpc_single 2167 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2168 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2169 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2170 /*Conditional=*/true); 2171 SingleOpGen.setAction(Action); 2172 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2173 if (DidIt.isValid()) { 2174 // did_it = 1; 2175 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2176 } 2177 Action.Done(CGF); 2178 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2179 // <copy_func>, did_it); 2180 if (DidIt.isValid()) { 2181 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2182 auto CopyprivateArrayTy = 2183 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2184 /*IndexTypeQuals=*/0); 2185 // Create a list of all private variables for copyprivate. 2186 Address CopyprivateList = 2187 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2188 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2189 Address Elem = CGF.Builder.CreateConstArrayGEP( 2190 CopyprivateList, I, CGF.getPointerSize()); 2191 CGF.Builder.CreateStore( 2192 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2193 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2194 Elem); 2195 } 2196 // Build function that copies private values from single region to all other 2197 // threads in the corresponding parallel region. 2198 auto *CpyFn = emitCopyprivateCopyFunction( 2199 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2200 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 2201 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2202 Address CL = 2203 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2204 CGF.VoidPtrTy); 2205 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2206 llvm::Value *Args[] = { 2207 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2208 getThreadID(CGF, Loc), // i32 <gtid> 2209 BufSize, // size_t <buf_size> 2210 CL.getPointer(), // void *<copyprivate list> 2211 CpyFn, // void (*) (void *, void *) <copy_func> 2212 DidItVal // i32 did_it 2213 }; 2214 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2215 } 2216 } 2217 2218 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2219 const RegionCodeGenTy &OrderedOpGen, 2220 SourceLocation Loc, bool IsThreads) { 2221 if (!CGF.HaveInsertPoint()) 2222 return; 2223 // __kmpc_ordered(ident_t *, gtid); 2224 // OrderedOpGen(); 2225 // __kmpc_end_ordered(ident_t *, gtid); 2226 // Prepare arguments and build a call to __kmpc_ordered 2227 if (IsThreads) { 2228 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2229 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2230 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2231 Args); 2232 OrderedOpGen.setAction(Action); 2233 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2234 return; 2235 } 2236 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2237 } 2238 2239 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2240 OpenMPDirectiveKind Kind, bool EmitChecks, 2241 bool ForceSimpleCall) { 2242 if (!CGF.HaveInsertPoint()) 2243 return; 2244 // Build call __kmpc_cancel_barrier(loc, thread_id); 2245 // Build call __kmpc_barrier(loc, thread_id); 2246 unsigned Flags; 2247 if (Kind == OMPD_for) 2248 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2249 else if (Kind == OMPD_sections) 2250 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2251 else if (Kind == OMPD_single) 2252 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2253 else if (Kind == OMPD_barrier) 2254 Flags = OMP_IDENT_BARRIER_EXPL; 2255 else 2256 Flags = OMP_IDENT_BARRIER_IMPL; 2257 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2258 // thread_id); 2259 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2260 getThreadID(CGF, Loc)}; 2261 if (auto *OMPRegionInfo = 2262 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2263 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2264 auto *Result = CGF.EmitRuntimeCall( 2265 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2266 if (EmitChecks) { 2267 // if (__kmpc_cancel_barrier()) { 2268 // exit from construct; 2269 // } 2270 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2271 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2272 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2273 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2274 CGF.EmitBlock(ExitBB); 2275 // exit from construct; 2276 auto CancelDestination = 2277 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2278 CGF.EmitBranchThroughCleanup(CancelDestination); 2279 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2280 } 2281 return; 2282 } 2283 } 2284 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2285 } 2286 2287 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2288 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2289 bool Chunked, bool Ordered) { 2290 switch (ScheduleKind) { 2291 case OMPC_SCHEDULE_static: 2292 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2293 : (Ordered ? OMP_ord_static : OMP_sch_static); 2294 case OMPC_SCHEDULE_dynamic: 2295 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2296 case OMPC_SCHEDULE_guided: 2297 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2298 case OMPC_SCHEDULE_runtime: 2299 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2300 case OMPC_SCHEDULE_auto: 2301 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2302 case OMPC_SCHEDULE_unknown: 2303 assert(!Chunked && "chunk was specified but schedule kind not known"); 2304 return Ordered ? OMP_ord_static : OMP_sch_static; 2305 } 2306 llvm_unreachable("Unexpected runtime schedule"); 2307 } 2308 2309 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 2310 static OpenMPSchedType 2311 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2312 // only static is allowed for dist_schedule 2313 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2314 } 2315 2316 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2317 bool Chunked) const { 2318 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2319 return Schedule == OMP_sch_static; 2320 } 2321 2322 bool CGOpenMPRuntime::isStaticNonchunked( 2323 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2324 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2325 return Schedule == OMP_dist_sch_static; 2326 } 2327 2328 2329 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2330 auto Schedule = 2331 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2332 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2333 return Schedule != OMP_sch_static; 2334 } 2335 2336 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 2337 SourceLocation Loc, 2338 OpenMPScheduleClauseKind ScheduleKind, 2339 unsigned IVSize, bool IVSigned, 2340 bool Ordered, llvm::Value *UB, 2341 llvm::Value *Chunk) { 2342 if (!CGF.HaveInsertPoint()) 2343 return; 2344 OpenMPSchedType Schedule = 2345 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 2346 assert(Ordered || 2347 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2348 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); 2349 // Call __kmpc_dispatch_init( 2350 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2351 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2352 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2353 2354 // If the Chunk was not specified in the clause - use default value 1. 2355 if (Chunk == nullptr) 2356 Chunk = CGF.Builder.getIntN(IVSize, 1); 2357 llvm::Value *Args[] = { 2358 emitUpdateLocation(CGF, Loc), 2359 getThreadID(CGF, Loc), 2360 CGF.Builder.getInt32(Schedule), // Schedule type 2361 CGF.Builder.getIntN(IVSize, 0), // Lower 2362 UB, // Upper 2363 CGF.Builder.getIntN(IVSize, 1), // Stride 2364 Chunk // Chunk 2365 }; 2366 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2367 } 2368 2369 static void emitForStaticInitCall(CodeGenFunction &CGF, 2370 SourceLocation Loc, 2371 llvm::Value * UpdateLocation, 2372 llvm::Value * ThreadId, 2373 llvm::Constant * ForStaticInitFunction, 2374 OpenMPSchedType Schedule, 2375 unsigned IVSize, bool IVSigned, bool Ordered, 2376 Address IL, Address LB, Address UB, 2377 Address ST, llvm::Value *Chunk) { 2378 if (!CGF.HaveInsertPoint()) 2379 return; 2380 2381 assert(!Ordered); 2382 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2383 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2384 Schedule == OMP_dist_sch_static || 2385 Schedule == OMP_dist_sch_static_chunked); 2386 2387 // Call __kmpc_for_static_init( 2388 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2389 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2390 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2391 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2392 if (Chunk == nullptr) { 2393 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2394 Schedule == OMP_dist_sch_static) && 2395 "expected static non-chunked schedule"); 2396 // If the Chunk was not specified in the clause - use default value 1. 2397 Chunk = CGF.Builder.getIntN(IVSize, 1); 2398 } else { 2399 assert((Schedule == OMP_sch_static_chunked || 2400 Schedule == OMP_ord_static_chunked || 2401 Schedule == OMP_dist_sch_static_chunked) && 2402 "expected static chunked schedule"); 2403 } 2404 llvm::Value *Args[] = { 2405 UpdateLocation, 2406 ThreadId, 2407 CGF.Builder.getInt32(Schedule), // Schedule type 2408 IL.getPointer(), // &isLastIter 2409 LB.getPointer(), // &LB 2410 UB.getPointer(), // &UB 2411 ST.getPointer(), // &Stride 2412 CGF.Builder.getIntN(IVSize, 1), // Incr 2413 Chunk // Chunk 2414 }; 2415 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2416 } 2417 2418 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2419 SourceLocation Loc, 2420 OpenMPScheduleClauseKind ScheduleKind, 2421 unsigned IVSize, bool IVSigned, 2422 bool Ordered, Address IL, Address LB, 2423 Address UB, Address ST, 2424 llvm::Value *Chunk) { 2425 OpenMPSchedType ScheduleNum = getRuntimeSchedule(ScheduleKind, Chunk != nullptr, 2426 Ordered); 2427 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2428 auto *ThreadId = getThreadID(CGF, Loc); 2429 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2430 emitForStaticInitCall(CGF, Loc, UpdatedLocation, ThreadId, StaticInitFunction, 2431 ScheduleNum, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); 2432 } 2433 2434 void CGOpenMPRuntime::emitDistributeStaticInit(CodeGenFunction &CGF, 2435 SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, 2436 unsigned IVSize, bool IVSigned, 2437 bool Ordered, Address IL, Address LB, 2438 Address UB, Address ST, 2439 llvm::Value *Chunk) { 2440 OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); 2441 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2442 auto *ThreadId = getThreadID(CGF, Loc); 2443 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2444 emitForStaticInitCall(CGF, Loc, UpdatedLocation, ThreadId, StaticInitFunction, 2445 ScheduleNum, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); 2446 } 2447 2448 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2449 SourceLocation Loc) { 2450 if (!CGF.HaveInsertPoint()) 2451 return; 2452 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2453 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2454 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 2455 Args); 2456 } 2457 2458 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2459 SourceLocation Loc, 2460 unsigned IVSize, 2461 bool IVSigned) { 2462 if (!CGF.HaveInsertPoint()) 2463 return; 2464 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2465 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2466 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2467 } 2468 2469 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2470 SourceLocation Loc, unsigned IVSize, 2471 bool IVSigned, Address IL, 2472 Address LB, Address UB, 2473 Address ST) { 2474 // Call __kmpc_dispatch_next( 2475 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2476 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2477 // kmp_int[32|64] *p_stride); 2478 llvm::Value *Args[] = { 2479 emitUpdateLocation(CGF, Loc), 2480 getThreadID(CGF, Loc), 2481 IL.getPointer(), // &isLastIter 2482 LB.getPointer(), // &Lower 2483 UB.getPointer(), // &Upper 2484 ST.getPointer() // &Stride 2485 }; 2486 llvm::Value *Call = 2487 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2488 return CGF.EmitScalarConversion( 2489 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 2490 CGF.getContext().BoolTy, Loc); 2491 } 2492 2493 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2494 llvm::Value *NumThreads, 2495 SourceLocation Loc) { 2496 if (!CGF.HaveInsertPoint()) 2497 return; 2498 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2499 llvm::Value *Args[] = { 2500 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2501 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2502 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 2503 Args); 2504 } 2505 2506 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2507 OpenMPProcBindClauseKind ProcBind, 2508 SourceLocation Loc) { 2509 if (!CGF.HaveInsertPoint()) 2510 return; 2511 // Constants for proc bind value accepted by the runtime. 2512 enum ProcBindTy { 2513 ProcBindFalse = 0, 2514 ProcBindTrue, 2515 ProcBindMaster, 2516 ProcBindClose, 2517 ProcBindSpread, 2518 ProcBindIntel, 2519 ProcBindDefault 2520 } RuntimeProcBind; 2521 switch (ProcBind) { 2522 case OMPC_PROC_BIND_master: 2523 RuntimeProcBind = ProcBindMaster; 2524 break; 2525 case OMPC_PROC_BIND_close: 2526 RuntimeProcBind = ProcBindClose; 2527 break; 2528 case OMPC_PROC_BIND_spread: 2529 RuntimeProcBind = ProcBindSpread; 2530 break; 2531 case OMPC_PROC_BIND_unknown: 2532 llvm_unreachable("Unsupported proc_bind value."); 2533 } 2534 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2535 llvm::Value *Args[] = { 2536 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2537 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 2538 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 2539 } 2540 2541 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2542 SourceLocation Loc) { 2543 if (!CGF.HaveInsertPoint()) 2544 return; 2545 // Build call void __kmpc_flush(ident_t *loc) 2546 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 2547 emitUpdateLocation(CGF, Loc)); 2548 } 2549 2550 namespace { 2551 /// \brief Indexes of fields for type kmp_task_t. 2552 enum KmpTaskTFields { 2553 /// \brief List of shared variables. 2554 KmpTaskTShareds, 2555 /// \brief Task routine. 2556 KmpTaskTRoutine, 2557 /// \brief Partition id for the untied tasks. 2558 KmpTaskTPartId, 2559 /// \brief Function with call of destructors for private variables. 2560 KmpTaskTDestructors, 2561 /// (Taskloops only) Lower bound. 2562 KmpTaskTLowerBound, 2563 /// (Taskloops only) Upper bound. 2564 KmpTaskTUpperBound, 2565 /// (Taskloops only) Stride. 2566 KmpTaskTStride, 2567 /// (Taskloops only) Is last iteration flag. 2568 KmpTaskTLastIter, 2569 }; 2570 } // anonymous namespace 2571 2572 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2573 // FIXME: Add other entries type when they become supported. 2574 return OffloadEntriesTargetRegion.empty(); 2575 } 2576 2577 /// \brief Initialize target region entry. 2578 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2579 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2580 StringRef ParentName, unsigned LineNum, 2581 unsigned Order) { 2582 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2583 "only required for the device " 2584 "code generation."); 2585 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2586 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); 2587 ++OffloadingEntriesNum; 2588 } 2589 2590 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2591 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2592 StringRef ParentName, unsigned LineNum, 2593 llvm::Constant *Addr, llvm::Constant *ID) { 2594 // If we are emitting code for a target, the entry is already initialized, 2595 // only has to be registered. 2596 if (CGM.getLangOpts().OpenMPIsDevice) { 2597 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2598 "Entry must exist."); 2599 auto &Entry = 2600 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2601 assert(Entry.isValid() && "Entry not initialized!"); 2602 Entry.setAddress(Addr); 2603 Entry.setID(ID); 2604 return; 2605 } else { 2606 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); 2607 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2608 } 2609 } 2610 2611 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2612 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2613 unsigned LineNum) const { 2614 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2615 if (PerDevice == OffloadEntriesTargetRegion.end()) 2616 return false; 2617 auto PerFile = PerDevice->second.find(FileID); 2618 if (PerFile == PerDevice->second.end()) 2619 return false; 2620 auto PerParentName = PerFile->second.find(ParentName); 2621 if (PerParentName == PerFile->second.end()) 2622 return false; 2623 auto PerLine = PerParentName->second.find(LineNum); 2624 if (PerLine == PerParentName->second.end()) 2625 return false; 2626 // Fail if this entry is already registered. 2627 if (PerLine->second.getAddress() || PerLine->second.getID()) 2628 return false; 2629 return true; 2630 } 2631 2632 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2633 const OffloadTargetRegionEntryInfoActTy &Action) { 2634 // Scan all target region entries and perform the provided action. 2635 for (auto &D : OffloadEntriesTargetRegion) 2636 for (auto &F : D.second) 2637 for (auto &P : F.second) 2638 for (auto &L : P.second) 2639 Action(D.first, F.first, P.first(), L.first, L.second); 2640 } 2641 2642 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 2643 /// \a Codegen. This is used to emit the two functions that register and 2644 /// unregister the descriptor of the current compilation unit. 2645 static llvm::Function * 2646 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 2647 const RegionCodeGenTy &Codegen) { 2648 auto &C = CGM.getContext(); 2649 FunctionArgList Args; 2650 ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), 2651 /*Id=*/nullptr, C.VoidPtrTy); 2652 Args.push_back(&DummyPtr); 2653 2654 CodeGenFunction CGF(CGM); 2655 GlobalDecl(); 2656 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2657 auto FTy = CGM.getTypes().GetFunctionType(FI); 2658 auto *Fn = 2659 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 2660 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 2661 Codegen(CGF); 2662 CGF.FinishFunction(); 2663 return Fn; 2664 } 2665 2666 llvm::Function * 2667 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 2668 2669 // If we don't have entries or if we are emitting code for the device, we 2670 // don't need to do anything. 2671 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 2672 return nullptr; 2673 2674 auto &M = CGM.getModule(); 2675 auto &C = CGM.getContext(); 2676 2677 // Get list of devices we care about 2678 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 2679 2680 // We should be creating an offloading descriptor only if there are devices 2681 // specified. 2682 assert(!Devices.empty() && "No OpenMP offloading devices??"); 2683 2684 // Create the external variables that will point to the begin and end of the 2685 // host entries section. These will be defined by the linker. 2686 auto *OffloadEntryTy = 2687 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 2688 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 2689 M, OffloadEntryTy, /*isConstant=*/true, 2690 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2691 ".omp_offloading.entries_begin"); 2692 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 2693 M, OffloadEntryTy, /*isConstant=*/true, 2694 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2695 ".omp_offloading.entries_end"); 2696 2697 // Create all device images 2698 llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires; 2699 auto *DeviceImageTy = cast<llvm::StructType>( 2700 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 2701 2702 for (unsigned i = 0; i < Devices.size(); ++i) { 2703 StringRef T = Devices[i].getTriple(); 2704 auto *ImgBegin = new llvm::GlobalVariable( 2705 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2706 /*Initializer=*/nullptr, 2707 Twine(".omp_offloading.img_start.") + Twine(T)); 2708 auto *ImgEnd = new llvm::GlobalVariable( 2709 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2710 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 2711 2712 llvm::Constant *Dev = 2713 llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd, 2714 HostEntriesBegin, HostEntriesEnd, nullptr); 2715 DeviceImagesEntires.push_back(Dev); 2716 } 2717 2718 // Create device images global array. 2719 llvm::ArrayType *DeviceImagesInitTy = 2720 llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size()); 2721 llvm::Constant *DeviceImagesInit = 2722 llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires); 2723 2724 llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable( 2725 M, DeviceImagesInitTy, /*isConstant=*/true, 2726 llvm::GlobalValue::InternalLinkage, DeviceImagesInit, 2727 ".omp_offloading.device_images"); 2728 DeviceImages->setUnnamedAddr(true); 2729 2730 // This is a Zero array to be used in the creation of the constant expressions 2731 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 2732 llvm::Constant::getNullValue(CGM.Int32Ty)}; 2733 2734 // Create the target region descriptor. 2735 auto *BinaryDescriptorTy = cast<llvm::StructType>( 2736 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 2737 llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get( 2738 BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 2739 llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages, 2740 Index), 2741 HostEntriesBegin, HostEntriesEnd, nullptr); 2742 2743 auto *Desc = new llvm::GlobalVariable( 2744 M, BinaryDescriptorTy, /*isConstant=*/true, 2745 llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit, 2746 ".omp_offloading.descriptor"); 2747 2748 // Emit code to register or unregister the descriptor at execution 2749 // startup or closing, respectively. 2750 2751 // Create a variable to drive the registration and unregistration of the 2752 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 2753 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 2754 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 2755 IdentInfo, C.CharTy); 2756 2757 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 2758 CGM, ".omp_offloading.descriptor_unreg", 2759 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2760 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 2761 Desc); 2762 }); 2763 auto *RegFn = createOffloadingBinaryDescriptorFunction( 2764 CGM, ".omp_offloading.descriptor_reg", 2765 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2766 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 2767 Desc); 2768 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 2769 }); 2770 return RegFn; 2771 } 2772 2773 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 2774 llvm::Constant *Addr, uint64_t Size) { 2775 StringRef Name = Addr->getName(); 2776 auto *TgtOffloadEntryType = cast<llvm::StructType>( 2777 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 2778 llvm::LLVMContext &C = CGM.getModule().getContext(); 2779 llvm::Module &M = CGM.getModule(); 2780 2781 // Make sure the address has the right type. 2782 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 2783 2784 // Create constant string with the name. 2785 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 2786 2787 llvm::GlobalVariable *Str = 2788 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 2789 llvm::GlobalValue::InternalLinkage, StrPtrInit, 2790 ".omp_offloading.entry_name"); 2791 Str->setUnnamedAddr(true); 2792 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 2793 2794 // Create the entry struct. 2795 llvm::Constant *EntryInit = llvm::ConstantStruct::get( 2796 TgtOffloadEntryType, AddrPtr, StrPtr, 2797 llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr); 2798 llvm::GlobalVariable *Entry = new llvm::GlobalVariable( 2799 M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage, 2800 EntryInit, ".omp_offloading.entry"); 2801 2802 // The entry has to be created in the section the linker expects it to be. 2803 Entry->setSection(".omp_offloading.entries"); 2804 // We can't have any padding between symbols, so we need to have 1-byte 2805 // alignment. 2806 Entry->setAlignment(1); 2807 } 2808 2809 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2810 // Emit the offloading entries and metadata so that the device codegen side 2811 // can 2812 // easily figure out what to emit. The produced metadata looks like this: 2813 // 2814 // !omp_offload.info = !{!1, ...} 2815 // 2816 // Right now we only generate metadata for function that contain target 2817 // regions. 2818 2819 // If we do not have entries, we dont need to do anything. 2820 if (OffloadEntriesInfoManager.empty()) 2821 return; 2822 2823 llvm::Module &M = CGM.getModule(); 2824 llvm::LLVMContext &C = M.getContext(); 2825 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 2826 OrderedEntries(OffloadEntriesInfoManager.size()); 2827 2828 // Create the offloading info metadata node. 2829 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 2830 2831 // Auxiliar methods to create metadata values and strings. 2832 auto getMDInt = [&](unsigned v) { 2833 return llvm::ConstantAsMetadata::get( 2834 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 2835 }; 2836 2837 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 2838 2839 // Create function that emits metadata for each target region entry; 2840 auto &&TargetRegionMetadataEmitter = [&]( 2841 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 2842 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 2843 llvm::SmallVector<llvm::Metadata *, 32> Ops; 2844 // Generate metadata for target regions. Each entry of this metadata 2845 // contains: 2846 // - Entry 0 -> Kind of this type of metadata (0). 2847 // - Entry 1 -> Device ID of the file where the entry was identified. 2848 // - Entry 2 -> File ID of the file where the entry was identified. 2849 // - Entry 3 -> Mangled name of the function where the entry was identified. 2850 // - Entry 4 -> Line in the file where the entry was identified. 2851 // - Entry 5 -> Order the entry was created. 2852 // The first element of the metadata node is the kind. 2853 Ops.push_back(getMDInt(E.getKind())); 2854 Ops.push_back(getMDInt(DeviceID)); 2855 Ops.push_back(getMDInt(FileID)); 2856 Ops.push_back(getMDString(ParentName)); 2857 Ops.push_back(getMDInt(Line)); 2858 Ops.push_back(getMDInt(E.getOrder())); 2859 2860 // Save this entry in the right position of the ordered entries array. 2861 OrderedEntries[E.getOrder()] = &E; 2862 2863 // Add metadata to the named metadata node. 2864 MD->addOperand(llvm::MDNode::get(C, Ops)); 2865 }; 2866 2867 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 2868 TargetRegionMetadataEmitter); 2869 2870 for (auto *E : OrderedEntries) { 2871 assert(E && "All ordered entries must exist!"); 2872 if (auto *CE = 2873 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 2874 E)) { 2875 assert(CE->getID() && CE->getAddress() && 2876 "Entry ID and Addr are invalid!"); 2877 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 2878 } else 2879 llvm_unreachable("Unsupported entry kind."); 2880 } 2881 } 2882 2883 /// \brief Loads all the offload entries information from the host IR 2884 /// metadata. 2885 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 2886 // If we are in target mode, load the metadata from the host IR. This code has 2887 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 2888 2889 if (!CGM.getLangOpts().OpenMPIsDevice) 2890 return; 2891 2892 if (CGM.getLangOpts().OMPHostIRFile.empty()) 2893 return; 2894 2895 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 2896 if (Buf.getError()) 2897 return; 2898 2899 llvm::LLVMContext C; 2900 auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C); 2901 2902 if (ME.getError()) 2903 return; 2904 2905 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 2906 if (!MD) 2907 return; 2908 2909 for (auto I : MD->operands()) { 2910 llvm::MDNode *MN = cast<llvm::MDNode>(I); 2911 2912 auto getMDInt = [&](unsigned Idx) { 2913 llvm::ConstantAsMetadata *V = 2914 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 2915 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 2916 }; 2917 2918 auto getMDString = [&](unsigned Idx) { 2919 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 2920 return V->getString(); 2921 }; 2922 2923 switch (getMDInt(0)) { 2924 default: 2925 llvm_unreachable("Unexpected metadata!"); 2926 break; 2927 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 2928 OFFLOAD_ENTRY_INFO_TARGET_REGION: 2929 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 2930 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 2931 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 2932 /*Order=*/getMDInt(5)); 2933 break; 2934 } 2935 } 2936 } 2937 2938 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 2939 if (!KmpRoutineEntryPtrTy) { 2940 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 2941 auto &C = CGM.getContext(); 2942 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 2943 FunctionProtoType::ExtProtoInfo EPI; 2944 KmpRoutineEntryPtrQTy = C.getPointerType( 2945 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 2946 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 2947 } 2948 } 2949 2950 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 2951 QualType FieldTy) { 2952 auto *Field = FieldDecl::Create( 2953 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 2954 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 2955 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 2956 Field->setAccess(AS_public); 2957 DC->addDecl(Field); 2958 return Field; 2959 } 2960 2961 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 2962 2963 // Make sure the type of the entry is already created. This is the type we 2964 // have to create: 2965 // struct __tgt_offload_entry{ 2966 // void *addr; // Pointer to the offload entry info. 2967 // // (function or global) 2968 // char *name; // Name of the function or global. 2969 // size_t size; // Size of the entry info (0 if it a function). 2970 // }; 2971 if (TgtOffloadEntryQTy.isNull()) { 2972 ASTContext &C = CGM.getContext(); 2973 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 2974 RD->startDefinition(); 2975 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2976 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 2977 addFieldToRecordDecl(C, RD, C.getSizeType()); 2978 RD->completeDefinition(); 2979 TgtOffloadEntryQTy = C.getRecordType(RD); 2980 } 2981 return TgtOffloadEntryQTy; 2982 } 2983 2984 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 2985 // These are the types we need to build: 2986 // struct __tgt_device_image{ 2987 // void *ImageStart; // Pointer to the target code start. 2988 // void *ImageEnd; // Pointer to the target code end. 2989 // // We also add the host entries to the device image, as it may be useful 2990 // // for the target runtime to have access to that information. 2991 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 2992 // // the entries. 2993 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 2994 // // entries (non inclusive). 2995 // }; 2996 if (TgtDeviceImageQTy.isNull()) { 2997 ASTContext &C = CGM.getContext(); 2998 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 2999 RD->startDefinition(); 3000 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3001 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3002 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3003 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3004 RD->completeDefinition(); 3005 TgtDeviceImageQTy = C.getRecordType(RD); 3006 } 3007 return TgtDeviceImageQTy; 3008 } 3009 3010 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 3011 // struct __tgt_bin_desc{ 3012 // int32_t NumDevices; // Number of devices supported. 3013 // __tgt_device_image *DeviceImages; // Arrays of device images 3014 // // (one per device). 3015 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 3016 // // entries. 3017 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 3018 // // entries (non inclusive). 3019 // }; 3020 if (TgtBinaryDescriptorQTy.isNull()) { 3021 ASTContext &C = CGM.getContext(); 3022 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 3023 RD->startDefinition(); 3024 addFieldToRecordDecl( 3025 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3026 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 3027 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3028 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 3029 RD->completeDefinition(); 3030 TgtBinaryDescriptorQTy = C.getRecordType(RD); 3031 } 3032 return TgtBinaryDescriptorQTy; 3033 } 3034 3035 namespace { 3036 struct PrivateHelpersTy { 3037 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 3038 const VarDecl *PrivateElemInit) 3039 : Original(Original), PrivateCopy(PrivateCopy), 3040 PrivateElemInit(PrivateElemInit) {} 3041 const VarDecl *Original; 3042 const VarDecl *PrivateCopy; 3043 const VarDecl *PrivateElemInit; 3044 }; 3045 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3046 } // anonymous namespace 3047 3048 static RecordDecl * 3049 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3050 if (!Privates.empty()) { 3051 auto &C = CGM.getContext(); 3052 // Build struct .kmp_privates_t. { 3053 // /* private vars */ 3054 // }; 3055 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 3056 RD->startDefinition(); 3057 for (auto &&Pair : Privates) { 3058 auto *VD = Pair.second.Original; 3059 auto Type = VD->getType(); 3060 Type = Type.getNonReferenceType(); 3061 auto *FD = addFieldToRecordDecl(C, RD, Type); 3062 if (VD->hasAttrs()) { 3063 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3064 E(VD->getAttrs().end()); 3065 I != E; ++I) 3066 FD->addAttr(*I); 3067 } 3068 } 3069 RD->completeDefinition(); 3070 return RD; 3071 } 3072 return nullptr; 3073 } 3074 3075 static RecordDecl * 3076 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3077 QualType KmpInt32Ty, 3078 QualType KmpRoutineEntryPointerQTy) { 3079 auto &C = CGM.getContext(); 3080 // Build struct kmp_task_t { 3081 // void * shareds; 3082 // kmp_routine_entry_t routine; 3083 // kmp_int32 part_id; 3084 // kmp_routine_entry_t destructors; 3085 // For taskloops additional fields: 3086 // kmp_uint64 lb; 3087 // kmp_uint64 ub; 3088 // kmp_int64 st; 3089 // kmp_int32 liter; 3090 // }; 3091 auto *RD = C.buildImplicitRecord("kmp_task_t"); 3092 RD->startDefinition(); 3093 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3094 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3095 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3096 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3097 if (isOpenMPTaskLoopDirective(Kind)) { 3098 QualType KmpUInt64Ty = 3099 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3100 QualType KmpInt64Ty = 3101 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3102 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3103 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3104 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3105 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3106 } 3107 RD->completeDefinition(); 3108 return RD; 3109 } 3110 3111 static RecordDecl * 3112 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3113 ArrayRef<PrivateDataTy> Privates) { 3114 auto &C = CGM.getContext(); 3115 // Build struct kmp_task_t_with_privates { 3116 // kmp_task_t task_data; 3117 // .kmp_privates_t. privates; 3118 // }; 3119 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3120 RD->startDefinition(); 3121 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3122 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 3123 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3124 } 3125 RD->completeDefinition(); 3126 return RD; 3127 } 3128 3129 /// \brief Emit a proxy function which accepts kmp_task_t as the second 3130 /// argument. 3131 /// \code 3132 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3133 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3134 /// For taskloops: 3135 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3136 /// tt->shareds); 3137 /// return 0; 3138 /// } 3139 /// \endcode 3140 static llvm::Value * 3141 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3142 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3143 QualType KmpTaskTWithPrivatesPtrQTy, 3144 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3145 QualType SharedsPtrTy, llvm::Value *TaskFunction, 3146 llvm::Value *TaskPrivatesMap) { 3147 auto &C = CGM.getContext(); 3148 FunctionArgList Args; 3149 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3150 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3151 /*Id=*/nullptr, 3152 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3153 Args.push_back(&GtidArg); 3154 Args.push_back(&TaskTypeArg); 3155 auto &TaskEntryFnInfo = 3156 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3157 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3158 auto *TaskEntry = 3159 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 3160 ".omp_task_entry.", &CGM.getModule()); 3161 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 3162 CodeGenFunction CGF(CGM); 3163 CGF.disableDebugInfo(); 3164 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 3165 3166 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3167 // tt, 3168 // For taskloops: 3169 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3170 // tt->task_data.shareds); 3171 auto *GtidParam = CGF.EmitLoadOfScalar( 3172 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3173 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3174 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3175 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3176 auto *KmpTaskTWithPrivatesQTyRD = 3177 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3178 LValue Base = 3179 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3180 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3181 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3182 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3183 auto *PartidParam = PartIdLVal.getPointer(); 3184 3185 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3186 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3187 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3188 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 3189 CGF.ConvertTypeForMem(SharedsPtrTy)); 3190 3191 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3192 llvm::Value *PrivatesParam; 3193 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3194 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3195 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3196 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3197 } else 3198 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3199 3200 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3201 TaskPrivatesMap, 3202 CGF.Builder 3203 .CreatePointerBitCastOrAddrSpaceCast( 3204 TDBase.getAddress(), CGF.VoidPtrTy) 3205 .getPointer()}; 3206 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3207 std::end(CommonArgs)); 3208 if (isOpenMPTaskLoopDirective(Kind)) { 3209 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3210 auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3211 auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); 3212 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3213 auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3214 auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); 3215 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3216 auto StLVal = CGF.EmitLValueForField(Base, *StFI); 3217 auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); 3218 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3219 auto LILVal = CGF.EmitLValueForField(Base, *LIFI); 3220 auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); 3221 CallArgs.push_back(LBParam); 3222 CallArgs.push_back(UBParam); 3223 CallArgs.push_back(StParam); 3224 CallArgs.push_back(LIParam); 3225 } 3226 CallArgs.push_back(SharedsParam); 3227 3228 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 3229 CGF.EmitStoreThroughLValue( 3230 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3231 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3232 CGF.FinishFunction(); 3233 return TaskEntry; 3234 } 3235 3236 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3237 SourceLocation Loc, 3238 QualType KmpInt32Ty, 3239 QualType KmpTaskTWithPrivatesPtrQTy, 3240 QualType KmpTaskTWithPrivatesQTy) { 3241 auto &C = CGM.getContext(); 3242 FunctionArgList Args; 3243 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3244 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3245 /*Id=*/nullptr, 3246 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3247 Args.push_back(&GtidArg); 3248 Args.push_back(&TaskTypeArg); 3249 FunctionType::ExtInfo Info; 3250 auto &DestructorFnInfo = 3251 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3252 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 3253 auto *DestructorFn = 3254 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3255 ".omp_task_destructor.", &CGM.getModule()); 3256 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 3257 DestructorFnInfo); 3258 CodeGenFunction CGF(CGM); 3259 CGF.disableDebugInfo(); 3260 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3261 Args); 3262 3263 LValue Base = CGF.EmitLoadOfPointerLValue( 3264 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3265 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3266 auto *KmpTaskTWithPrivatesQTyRD = 3267 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3268 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3269 Base = CGF.EmitLValueForField(Base, *FI); 3270 for (auto *Field : 3271 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3272 if (auto DtorKind = Field->getType().isDestructedType()) { 3273 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 3274 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3275 } 3276 } 3277 CGF.FinishFunction(); 3278 return DestructorFn; 3279 } 3280 3281 /// \brief Emit a privates mapping function for correct handling of private and 3282 /// firstprivate variables. 3283 /// \code 3284 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3285 /// **noalias priv1,..., <tyn> **noalias privn) { 3286 /// *priv1 = &.privates.priv1; 3287 /// ...; 3288 /// *privn = &.privates.privn; 3289 /// } 3290 /// \endcode 3291 static llvm::Value * 3292 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3293 ArrayRef<const Expr *> PrivateVars, 3294 ArrayRef<const Expr *> FirstprivateVars, 3295 QualType PrivatesQTy, 3296 ArrayRef<PrivateDataTy> Privates) { 3297 auto &C = CGM.getContext(); 3298 FunctionArgList Args; 3299 ImplicitParamDecl TaskPrivatesArg( 3300 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3301 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 3302 Args.push_back(&TaskPrivatesArg); 3303 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3304 unsigned Counter = 1; 3305 for (auto *E: PrivateVars) { 3306 Args.push_back(ImplicitParamDecl::Create( 3307 C, /*DC=*/nullptr, Loc, 3308 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3309 .withConst() 3310 .withRestrict())); 3311 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3312 PrivateVarsPos[VD] = Counter; 3313 ++Counter; 3314 } 3315 for (auto *E : FirstprivateVars) { 3316 Args.push_back(ImplicitParamDecl::Create( 3317 C, /*DC=*/nullptr, Loc, 3318 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3319 .withConst() 3320 .withRestrict())); 3321 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3322 PrivateVarsPos[VD] = Counter; 3323 ++Counter; 3324 } 3325 auto &TaskPrivatesMapFnInfo = 3326 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3327 auto *TaskPrivatesMapTy = 3328 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3329 auto *TaskPrivatesMap = llvm::Function::Create( 3330 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 3331 ".omp_task_privates_map.", &CGM.getModule()); 3332 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 3333 TaskPrivatesMapFnInfo); 3334 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3335 CodeGenFunction CGF(CGM); 3336 CGF.disableDebugInfo(); 3337 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3338 TaskPrivatesMapFnInfo, Args); 3339 3340 // *privi = &.privates.privi; 3341 LValue Base = CGF.EmitLoadOfPointerLValue( 3342 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3343 TaskPrivatesArg.getType()->castAs<PointerType>()); 3344 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3345 Counter = 0; 3346 for (auto *Field : PrivatesQTyRD->fields()) { 3347 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 3348 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3349 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3350 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3351 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3352 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 3353 ++Counter; 3354 } 3355 CGF.FinishFunction(); 3356 return TaskPrivatesMap; 3357 } 3358 3359 static int array_pod_sort_comparator(const PrivateDataTy *P1, 3360 const PrivateDataTy *P2) { 3361 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 3362 } 3363 3364 CGOpenMPRuntime::TaskResultTy 3365 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3366 const OMPExecutableDirective &D, 3367 llvm::Value *TaskFunction, QualType SharedsTy, 3368 Address Shareds, const OMPTaskDataTy &Data) { 3369 auto &C = CGM.getContext(); 3370 llvm::SmallVector<PrivateDataTy, 4> Privates; 3371 // Aggregate privates and sort them by the alignment. 3372 auto I = Data.PrivateCopies.begin(); 3373 for (auto *E : Data.PrivateVars) { 3374 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3375 Privates.push_back(std::make_pair( 3376 C.getDeclAlign(VD), 3377 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3378 /*PrivateElemInit=*/nullptr))); 3379 ++I; 3380 } 3381 I = Data.FirstprivateCopies.begin(); 3382 auto IElemInitRef = Data.FirstprivateInits.begin(); 3383 for (auto *E : Data.FirstprivateVars) { 3384 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3385 Privates.push_back(std::make_pair( 3386 C.getDeclAlign(VD), 3387 PrivateHelpersTy( 3388 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3389 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 3390 ++I; 3391 ++IElemInitRef; 3392 } 3393 llvm::array_pod_sort(Privates.begin(), Privates.end(), 3394 array_pod_sort_comparator); 3395 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3396 // Build type kmp_routine_entry_t (if not built yet). 3397 emitKmpRoutineEntryT(KmpInt32Ty); 3398 // Build type kmp_task_t (if not built yet). 3399 if (KmpTaskTQTy.isNull()) { 3400 KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3401 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3402 } 3403 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3404 // Build particular struct kmp_task_t for the given task. 3405 auto *KmpTaskTWithPrivatesQTyRD = 3406 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3407 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3408 QualType KmpTaskTWithPrivatesPtrQTy = 3409 C.getPointerType(KmpTaskTWithPrivatesQTy); 3410 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3411 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 3412 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3413 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3414 3415 // Emit initial values for private copies (if any). 3416 llvm::Value *TaskPrivatesMap = nullptr; 3417 auto *TaskPrivatesMapTy = 3418 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 3419 3) 3420 ->getType(); 3421 if (!Privates.empty()) { 3422 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3423 TaskPrivatesMap = emitTaskPrivateMappingFunction(CGM, Loc, Data.PrivateVars, 3424 Data.FirstprivateVars, 3425 FI->getType(), Privates); 3426 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3427 TaskPrivatesMap, TaskPrivatesMapTy); 3428 } else { 3429 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3430 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3431 } 3432 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3433 // kmp_task_t *tt); 3434 auto *TaskEntry = emitProxyTaskFunction( 3435 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3436 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3437 TaskPrivatesMap); 3438 3439 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3440 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3441 // kmp_routine_entry_t *task_entry); 3442 // Task flags. Format is taken from 3443 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 3444 // description of kmp_tasking_flags struct. 3445 const unsigned TiedFlag = 0x1; 3446 const unsigned FinalFlag = 0x2; 3447 unsigned Flags = Data.Tied ? TiedFlag : 0; 3448 auto *TaskFlags = 3449 Data.Final.getPointer() 3450 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3451 CGF.Builder.getInt32(FinalFlag), 3452 CGF.Builder.getInt32(/*C=*/0)) 3453 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3454 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3455 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3456 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 3457 getThreadID(CGF, Loc), TaskFlags, 3458 KmpTaskTWithPrivatesTySize, SharedsSize, 3459 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3460 TaskEntry, KmpRoutineEntryPtrTy)}; 3461 auto *NewTask = CGF.EmitRuntimeCall( 3462 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 3463 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3464 NewTask, KmpTaskTWithPrivatesPtrTy); 3465 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 3466 KmpTaskTWithPrivatesQTy); 3467 LValue TDBase = 3468 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3469 // Fill the data in the resulting kmp_task_t record. 3470 // Copy shareds if there are any. 3471 Address KmpTaskSharedsPtr = Address::invalid(); 3472 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3473 KmpTaskSharedsPtr = 3474 Address(CGF.EmitLoadOfScalar( 3475 CGF.EmitLValueForField( 3476 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 3477 KmpTaskTShareds)), 3478 Loc), 3479 CGF.getNaturalTypeAlignment(SharedsTy)); 3480 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 3481 } 3482 // Emit initial values for private copies (if any). 3483 bool NeedsCleanup = false; 3484 if (!Privates.empty()) { 3485 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3486 auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); 3487 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3488 LValue SharedsBase; 3489 if (!Data.FirstprivateVars.empty()) { 3490 SharedsBase = CGF.MakeAddrLValue( 3491 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3492 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3493 SharedsTy); 3494 } 3495 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 3496 cast<CapturedStmt>(*D.getAssociatedStmt())); 3497 for (auto &&Pair : Privates) { 3498 auto *VD = Pair.second.PrivateCopy; 3499 auto *Init = VD->getAnyInitializer(); 3500 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3501 if (Init) { 3502 if (auto *Elem = Pair.second.PrivateElemInit) { 3503 auto *OriginalVD = Pair.second.Original; 3504 auto *SharedField = CapturesInfo.lookup(OriginalVD); 3505 auto SharedRefLValue = 3506 CGF.EmitLValueForField(SharedsBase, SharedField); 3507 SharedRefLValue = CGF.MakeAddrLValue( 3508 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 3509 SharedRefLValue.getType(), AlignmentSource::Decl); 3510 QualType Type = OriginalVD->getType(); 3511 if (Type->isArrayType()) { 3512 // Initialize firstprivate array. 3513 if (!isa<CXXConstructExpr>(Init) || 3514 CGF.isTrivialInitializer(Init)) { 3515 // Perform simple memcpy. 3516 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 3517 SharedRefLValue.getAddress(), Type); 3518 } else { 3519 // Initialize firstprivate array using element-by-element 3520 // intialization. 3521 CGF.EmitOMPAggregateAssign( 3522 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), 3523 Type, [&CGF, Elem, Init, &CapturesInfo]( 3524 Address DestElement, Address SrcElement) { 3525 // Clean up any temporaries needed by the initialization. 3526 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3527 InitScope.addPrivate(Elem, [SrcElement]() -> Address { 3528 return SrcElement; 3529 }); 3530 (void)InitScope.Privatize(); 3531 // Emit initialization for single element. 3532 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3533 CGF, &CapturesInfo); 3534 CGF.EmitAnyExprToMem(Init, DestElement, 3535 Init->getType().getQualifiers(), 3536 /*IsInitializer=*/false); 3537 }); 3538 } 3539 } else { 3540 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3541 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 3542 return SharedRefLValue.getAddress(); 3543 }); 3544 (void)InitScope.Privatize(); 3545 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3546 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3547 /*capturedByInit=*/false); 3548 } 3549 } else { 3550 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3551 } 3552 } 3553 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); 3554 ++FI; 3555 } 3556 } 3557 // Provide pointer to function with destructors for privates. 3558 llvm::Value *DestructorFn = 3559 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, 3560 KmpTaskTWithPrivatesPtrQTy, 3561 KmpTaskTWithPrivatesQTy) 3562 : llvm::ConstantPointerNull::get( 3563 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 3564 LValue Destructor = CGF.EmitLValueForField( 3565 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); 3566 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3567 DestructorFn, KmpRoutineEntryPtrTy), 3568 Destructor); 3569 TaskResultTy Result; 3570 Result.NewTask = NewTask; 3571 Result.TaskEntry = TaskEntry; 3572 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3573 Result.TDBase = TDBase; 3574 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3575 return Result; 3576 } 3577 3578 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 3579 const OMPExecutableDirective &D, 3580 llvm::Value *TaskFunction, 3581 QualType SharedsTy, Address Shareds, 3582 const Expr *IfCond, 3583 const OMPTaskDataTy &Data) { 3584 if (!CGF.HaveInsertPoint()) 3585 return; 3586 3587 TaskResultTy Result = 3588 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 3589 llvm::Value *NewTask = Result.NewTask; 3590 llvm::Value *TaskEntry = Result.TaskEntry; 3591 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 3592 LValue TDBase = Result.TDBase; 3593 RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 3594 auto &C = CGM.getContext(); 3595 // Process list of dependences. 3596 Address DependenciesArray = Address::invalid(); 3597 unsigned NumDependencies = Data.Dependences.size(); 3598 if (NumDependencies) { 3599 // Dependence kind for RTL. 3600 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 3601 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 3602 RecordDecl *KmpDependInfoRD; 3603 QualType FlagsTy = 3604 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 3605 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 3606 if (KmpDependInfoTy.isNull()) { 3607 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 3608 KmpDependInfoRD->startDefinition(); 3609 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 3610 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 3611 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 3612 KmpDependInfoRD->completeDefinition(); 3613 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 3614 } else 3615 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 3616 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 3617 // Define type kmp_depend_info[<Dependences.size()>]; 3618 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 3619 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 3620 ArrayType::Normal, /*IndexTypeQuals=*/0); 3621 // kmp_depend_info[<Dependences.size()>] deps; 3622 DependenciesArray = 3623 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 3624 for (unsigned i = 0; i < NumDependencies; ++i) { 3625 const Expr *E = Data.Dependences[i].second; 3626 auto Addr = CGF.EmitLValue(E); 3627 llvm::Value *Size; 3628 QualType Ty = E->getType(); 3629 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3630 LValue UpAddrLVal = 3631 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 3632 llvm::Value *UpAddr = 3633 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 3634 llvm::Value *LowIntPtr = 3635 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 3636 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 3637 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3638 } else 3639 Size = CGF.getTypeSize(Ty); 3640 auto Base = CGF.MakeAddrLValue( 3641 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 3642 KmpDependInfoTy); 3643 // deps[i].base_addr = &<Dependences[i].second>; 3644 auto BaseAddrLVal = CGF.EmitLValueForField( 3645 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 3646 CGF.EmitStoreOfScalar( 3647 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 3648 BaseAddrLVal); 3649 // deps[i].len = sizeof(<Dependences[i].second>); 3650 auto LenLVal = CGF.EmitLValueForField( 3651 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 3652 CGF.EmitStoreOfScalar(Size, LenLVal); 3653 // deps[i].flags = <Dependences[i].first>; 3654 RTLDependenceKindTy DepKind; 3655 switch (Data.Dependences[i].first) { 3656 case OMPC_DEPEND_in: 3657 DepKind = DepIn; 3658 break; 3659 // Out and InOut dependencies must use the same code. 3660 case OMPC_DEPEND_out: 3661 case OMPC_DEPEND_inout: 3662 DepKind = DepInOut; 3663 break; 3664 case OMPC_DEPEND_source: 3665 case OMPC_DEPEND_sink: 3666 case OMPC_DEPEND_unknown: 3667 llvm_unreachable("Unknown task dependence type"); 3668 } 3669 auto FlagsLVal = CGF.EmitLValueForField( 3670 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 3671 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 3672 FlagsLVal); 3673 } 3674 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3675 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 3676 CGF.VoidPtrTy); 3677 } 3678 3679 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 3680 // libcall. 3681 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 3682 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 3683 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 3684 // list is not empty 3685 auto *ThreadID = getThreadID(CGF, Loc); 3686 auto *UpLoc = emitUpdateLocation(CGF, Loc); 3687 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 3688 llvm::Value *DepTaskArgs[7]; 3689 if (NumDependencies) { 3690 DepTaskArgs[0] = UpLoc; 3691 DepTaskArgs[1] = ThreadID; 3692 DepTaskArgs[2] = NewTask; 3693 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 3694 DepTaskArgs[4] = DependenciesArray.getPointer(); 3695 DepTaskArgs[5] = CGF.Builder.getInt32(0); 3696 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3697 } 3698 auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD, 3699 NumDependencies, &TaskArgs, 3700 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 3701 if (!Data.Tied) { 3702 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3703 auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 3704 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 3705 } 3706 if (NumDependencies) { 3707 CGF.EmitRuntimeCall( 3708 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 3709 } else { 3710 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 3711 TaskArgs); 3712 } 3713 // Check if parent region is untied and build return for untied task; 3714 if (auto *Region = 3715 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3716 Region->emitUntiedSwitch(CGF); 3717 }; 3718 3719 llvm::Value *DepWaitTaskArgs[6]; 3720 if (NumDependencies) { 3721 DepWaitTaskArgs[0] = UpLoc; 3722 DepWaitTaskArgs[1] = ThreadID; 3723 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 3724 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 3725 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 3726 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3727 } 3728 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 3729 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, 3730 PrePostActionTy &) { 3731 auto &RT = CGF.CGM.getOpenMPRuntime(); 3732 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 3733 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 3734 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 3735 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 3736 // is specified. 3737 if (NumDependencies) 3738 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 3739 DepWaitTaskArgs); 3740 // Call proxy_task_entry(gtid, new_task); 3741 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( 3742 CodeGenFunction &CGF, PrePostActionTy &Action) { 3743 Action.Enter(CGF); 3744 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 3745 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 3746 }; 3747 3748 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 3749 // kmp_task_t *new_task); 3750 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 3751 // kmp_task_t *new_task); 3752 RegionCodeGenTy RCG(CodeGen); 3753 CommonActionTy Action( 3754 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 3755 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 3756 RCG.setAction(Action); 3757 RCG(CGF); 3758 }; 3759 3760 if (IfCond) 3761 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 3762 else { 3763 RegionCodeGenTy ThenRCG(ThenCodeGen); 3764 ThenRCG(CGF); 3765 } 3766 } 3767 3768 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 3769 const OMPLoopDirective &D, 3770 llvm::Value *TaskFunction, 3771 QualType SharedsTy, Address Shareds, 3772 const Expr *IfCond, 3773 const OMPTaskDataTy &Data) { 3774 if (!CGF.HaveInsertPoint()) 3775 return; 3776 TaskResultTy Result = 3777 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 3778 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 3779 // libcall. 3780 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 3781 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 3782 // sched, kmp_uint64 grainsize, void *task_dup); 3783 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3784 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 3785 llvm::Value *IfVal; 3786 if (IfCond) { 3787 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 3788 /*isSigned=*/true); 3789 } else 3790 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 3791 3792 LValue LBLVal = CGF.EmitLValueForField( 3793 Result.TDBase, 3794 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 3795 auto *LBVar = 3796 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 3797 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 3798 /*IsInitializer=*/true); 3799 LValue UBLVal = CGF.EmitLValueForField( 3800 Result.TDBase, 3801 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 3802 auto *UBVar = 3803 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 3804 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 3805 /*IsInitializer=*/true); 3806 LValue StLVal = CGF.EmitLValueForField( 3807 Result.TDBase, 3808 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 3809 auto *StVar = 3810 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 3811 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 3812 /*IsInitializer=*/true); 3813 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 3814 llvm::Value *TaskArgs[] = { 3815 UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), 3816 UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), 3817 llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), 3818 llvm::ConstantInt::getSigned( 3819 CGF.IntTy, Data.Schedule.getPointer() 3820 ? Data.Schedule.getInt() ? NumTasks : Grainsize 3821 : NoSchedule), 3822 Data.Schedule.getPointer() 3823 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 3824 /*isSigned=*/false) 3825 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 3826 llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 3827 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 3828 } 3829 3830 /// \brief Emit reduction operation for each element of array (required for 3831 /// array sections) LHS op = RHS. 3832 /// \param Type Type of array. 3833 /// \param LHSVar Variable on the left side of the reduction operation 3834 /// (references element of array in original variable). 3835 /// \param RHSVar Variable on the right side of the reduction operation 3836 /// (references element of array in original variable). 3837 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 3838 /// RHSVar. 3839 static void EmitOMPAggregateReduction( 3840 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 3841 const VarDecl *RHSVar, 3842 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 3843 const Expr *, const Expr *)> &RedOpGen, 3844 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 3845 const Expr *UpExpr = nullptr) { 3846 // Perform element-by-element initialization. 3847 QualType ElementTy; 3848 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 3849 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 3850 3851 // Drill down to the base element type on both arrays. 3852 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 3853 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 3854 3855 auto RHSBegin = RHSAddr.getPointer(); 3856 auto LHSBegin = LHSAddr.getPointer(); 3857 // Cast from pointer to array type to pointer to single element. 3858 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 3859 // The basic structure here is a while-do loop. 3860 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 3861 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 3862 auto IsEmpty = 3863 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 3864 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 3865 3866 // Enter the loop body, making that address the current address. 3867 auto EntryBB = CGF.Builder.GetInsertBlock(); 3868 CGF.EmitBlock(BodyBB); 3869 3870 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 3871 3872 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 3873 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 3874 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 3875 Address RHSElementCurrent = 3876 Address(RHSElementPHI, 3877 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 3878 3879 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 3880 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 3881 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 3882 Address LHSElementCurrent = 3883 Address(LHSElementPHI, 3884 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 3885 3886 // Emit copy. 3887 CodeGenFunction::OMPPrivateScope Scope(CGF); 3888 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 3889 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 3890 Scope.Privatize(); 3891 RedOpGen(CGF, XExpr, EExpr, UpExpr); 3892 Scope.ForceCleanup(); 3893 3894 // Shift the address forward by one element. 3895 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 3896 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 3897 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 3898 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 3899 // Check whether we've reached the end. 3900 auto Done = 3901 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 3902 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 3903 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 3904 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 3905 3906 // Done. 3907 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 3908 } 3909 3910 /// Emit reduction combiner. If the combiner is a simple expression emit it as 3911 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 3912 /// UDR combiner function. 3913 static void emitReductionCombiner(CodeGenFunction &CGF, 3914 const Expr *ReductionOp) { 3915 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 3916 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 3917 if (auto *DRE = 3918 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 3919 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 3920 std::pair<llvm::Function *, llvm::Function *> Reduction = 3921 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 3922 RValue Func = RValue::get(Reduction.first); 3923 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 3924 CGF.EmitIgnoredExpr(ReductionOp); 3925 return; 3926 } 3927 CGF.EmitIgnoredExpr(ReductionOp); 3928 } 3929 3930 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 3931 llvm::Type *ArgsType, 3932 ArrayRef<const Expr *> Privates, 3933 ArrayRef<const Expr *> LHSExprs, 3934 ArrayRef<const Expr *> RHSExprs, 3935 ArrayRef<const Expr *> ReductionOps) { 3936 auto &C = CGM.getContext(); 3937 3938 // void reduction_func(void *LHSArg, void *RHSArg); 3939 FunctionArgList Args; 3940 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 3941 C.VoidPtrTy); 3942 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 3943 C.VoidPtrTy); 3944 Args.push_back(&LHSArg); 3945 Args.push_back(&RHSArg); 3946 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3947 auto *Fn = llvm::Function::Create( 3948 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 3949 ".omp.reduction.reduction_func", &CGM.getModule()); 3950 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 3951 CodeGenFunction CGF(CGM); 3952 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 3953 3954 // Dst = (void*[n])(LHSArg); 3955 // Src = (void*[n])(RHSArg); 3956 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3957 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3958 ArgsType), CGF.getPointerAlign()); 3959 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3960 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3961 ArgsType), CGF.getPointerAlign()); 3962 3963 // ... 3964 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 3965 // ... 3966 CodeGenFunction::OMPPrivateScope Scope(CGF); 3967 auto IPriv = Privates.begin(); 3968 unsigned Idx = 0; 3969 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 3970 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 3971 Scope.addPrivate(RHSVar, [&]() -> Address { 3972 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 3973 }); 3974 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 3975 Scope.addPrivate(LHSVar, [&]() -> Address { 3976 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 3977 }); 3978 QualType PrivTy = (*IPriv)->getType(); 3979 if (PrivTy->isVariablyModifiedType()) { 3980 // Get array size and emit VLA type. 3981 ++Idx; 3982 Address Elem = 3983 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 3984 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 3985 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 3986 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 3987 CodeGenFunction::OpaqueValueMapping OpaqueMap( 3988 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 3989 CGF.EmitVariablyModifiedType(PrivTy); 3990 } 3991 } 3992 Scope.Privatize(); 3993 IPriv = Privates.begin(); 3994 auto ILHS = LHSExprs.begin(); 3995 auto IRHS = RHSExprs.begin(); 3996 for (auto *E : ReductionOps) { 3997 if ((*IPriv)->getType()->isArrayType()) { 3998 // Emit reduction for array section. 3999 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4000 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4001 EmitOMPAggregateReduction( 4002 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4003 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4004 emitReductionCombiner(CGF, E); 4005 }); 4006 } else 4007 // Emit reduction for array subscript or single variable. 4008 emitReductionCombiner(CGF, E); 4009 ++IPriv; 4010 ++ILHS; 4011 ++IRHS; 4012 } 4013 Scope.ForceCleanup(); 4014 CGF.FinishFunction(); 4015 return Fn; 4016 } 4017 4018 static void emitSingleReductionCombiner(CodeGenFunction &CGF, 4019 const Expr *ReductionOp, 4020 const Expr *PrivateRef, 4021 const DeclRefExpr *LHS, 4022 const DeclRefExpr *RHS) { 4023 if (PrivateRef->getType()->isArrayType()) { 4024 // Emit reduction for array section. 4025 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4026 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4027 EmitOMPAggregateReduction( 4028 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4029 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4030 emitReductionCombiner(CGF, ReductionOp); 4031 }); 4032 } else 4033 // Emit reduction for array subscript or single variable. 4034 emitReductionCombiner(CGF, ReductionOp); 4035 } 4036 4037 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4038 ArrayRef<const Expr *> Privates, 4039 ArrayRef<const Expr *> LHSExprs, 4040 ArrayRef<const Expr *> RHSExprs, 4041 ArrayRef<const Expr *> ReductionOps, 4042 bool WithNowait, bool SimpleReduction) { 4043 if (!CGF.HaveInsertPoint()) 4044 return; 4045 // Next code should be emitted for reduction: 4046 // 4047 // static kmp_critical_name lock = { 0 }; 4048 // 4049 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4050 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4051 // ... 4052 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4053 // *(Type<n>-1*)rhs[<n>-1]); 4054 // } 4055 // 4056 // ... 4057 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4058 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4059 // RedList, reduce_func, &<lock>)) { 4060 // case 1: 4061 // ... 4062 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4063 // ... 4064 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4065 // break; 4066 // case 2: 4067 // ... 4068 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4069 // ... 4070 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4071 // break; 4072 // default:; 4073 // } 4074 // 4075 // if SimpleReduction is true, only the next code is generated: 4076 // ... 4077 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4078 // ... 4079 4080 auto &C = CGM.getContext(); 4081 4082 if (SimpleReduction) { 4083 CodeGenFunction::RunCleanupsScope Scope(CGF); 4084 auto IPriv = Privates.begin(); 4085 auto ILHS = LHSExprs.begin(); 4086 auto IRHS = RHSExprs.begin(); 4087 for (auto *E : ReductionOps) { 4088 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4089 cast<DeclRefExpr>(*IRHS)); 4090 ++IPriv; 4091 ++ILHS; 4092 ++IRHS; 4093 } 4094 return; 4095 } 4096 4097 // 1. Build a list of reduction variables. 4098 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4099 auto Size = RHSExprs.size(); 4100 for (auto *E : Privates) { 4101 if (E->getType()->isVariablyModifiedType()) 4102 // Reserve place for array size. 4103 ++Size; 4104 } 4105 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4106 QualType ReductionArrayTy = 4107 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 4108 /*IndexTypeQuals=*/0); 4109 Address ReductionList = 4110 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4111 auto IPriv = Privates.begin(); 4112 unsigned Idx = 0; 4113 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4114 Address Elem = 4115 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 4116 CGF.Builder.CreateStore( 4117 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4118 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 4119 Elem); 4120 if ((*IPriv)->getType()->isVariablyModifiedType()) { 4121 // Store array size. 4122 ++Idx; 4123 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 4124 CGF.getPointerSize()); 4125 llvm::Value *Size = CGF.Builder.CreateIntCast( 4126 CGF.getVLASize( 4127 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 4128 .first, 4129 CGF.SizeTy, /*isSigned=*/false); 4130 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 4131 Elem); 4132 } 4133 } 4134 4135 // 2. Emit reduce_func(). 4136 auto *ReductionFn = emitReductionFunction( 4137 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 4138 LHSExprs, RHSExprs, ReductionOps); 4139 4140 // 3. Create static kmp_critical_name lock = { 0 }; 4141 auto *Lock = getCriticalRegionLock(".reduction"); 4142 4143 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4144 // RedList, reduce_func, &<lock>); 4145 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 4146 auto *ThreadId = getThreadID(CGF, Loc); 4147 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 4148 auto *RL = 4149 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), 4150 CGF.VoidPtrTy); 4151 llvm::Value *Args[] = { 4152 IdentTLoc, // ident_t *<loc> 4153 ThreadId, // i32 <gtid> 4154 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 4155 ReductionArrayTySize, // size_type sizeof(RedList) 4156 RL, // void *RedList 4157 ReductionFn, // void (*) (void *, void *) <reduce_func> 4158 Lock // kmp_critical_name *&<lock> 4159 }; 4160 auto Res = CGF.EmitRuntimeCall( 4161 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 4162 : OMPRTL__kmpc_reduce), 4163 Args); 4164 4165 // 5. Build switch(res) 4166 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 4167 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 4168 4169 // 6. Build case 1: 4170 // ... 4171 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4172 // ... 4173 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4174 // break; 4175 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 4176 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 4177 CGF.EmitBlock(Case1BB); 4178 4179 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4180 llvm::Value *EndArgs[] = { 4181 IdentTLoc, // ident_t *<loc> 4182 ThreadId, // i32 <gtid> 4183 Lock // kmp_critical_name *&<lock> 4184 }; 4185 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4186 CodeGenFunction &CGF, PrePostActionTy &Action) { 4187 auto IPriv = Privates.begin(); 4188 auto ILHS = LHSExprs.begin(); 4189 auto IRHS = RHSExprs.begin(); 4190 for (auto *E : ReductionOps) { 4191 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4192 cast<DeclRefExpr>(*IRHS)); 4193 ++IPriv; 4194 ++ILHS; 4195 ++IRHS; 4196 } 4197 }; 4198 RegionCodeGenTy RCG(CodeGen); 4199 CommonActionTy Action( 4200 nullptr, llvm::None, 4201 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 4202 : OMPRTL__kmpc_end_reduce), 4203 EndArgs); 4204 RCG.setAction(Action); 4205 RCG(CGF); 4206 4207 CGF.EmitBranch(DefaultBB); 4208 4209 // 7. Build case 2: 4210 // ... 4211 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4212 // ... 4213 // break; 4214 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 4215 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 4216 CGF.EmitBlock(Case2BB); 4217 4218 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 4219 CodeGenFunction &CGF, PrePostActionTy &Action) { 4220 auto ILHS = LHSExprs.begin(); 4221 auto IRHS = RHSExprs.begin(); 4222 auto IPriv = Privates.begin(); 4223 for (auto *E : ReductionOps) { 4224 const Expr *XExpr = nullptr; 4225 const Expr *EExpr = nullptr; 4226 const Expr *UpExpr = nullptr; 4227 BinaryOperatorKind BO = BO_Comma; 4228 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 4229 if (BO->getOpcode() == BO_Assign) { 4230 XExpr = BO->getLHS(); 4231 UpExpr = BO->getRHS(); 4232 } 4233 } 4234 // Try to emit update expression as a simple atomic. 4235 auto *RHSExpr = UpExpr; 4236 if (RHSExpr) { 4237 // Analyze RHS part of the whole expression. 4238 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 4239 RHSExpr->IgnoreParenImpCasts())) { 4240 // If this is a conditional operator, analyze its condition for 4241 // min/max reduction operator. 4242 RHSExpr = ACO->getCond(); 4243 } 4244 if (auto *BORHS = 4245 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 4246 EExpr = BORHS->getRHS(); 4247 BO = BORHS->getOpcode(); 4248 } 4249 } 4250 if (XExpr) { 4251 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4252 auto &&AtomicRedGen = [BO, VD, IPriv, 4253 Loc](CodeGenFunction &CGF, const Expr *XExpr, 4254 const Expr *EExpr, const Expr *UpExpr) { 4255 LValue X = CGF.EmitLValue(XExpr); 4256 RValue E; 4257 if (EExpr) 4258 E = CGF.EmitAnyExpr(EExpr); 4259 CGF.EmitOMPAtomicSimpleUpdateExpr( 4260 X, E, BO, /*IsXLHSInRHSPart=*/true, 4261 llvm::AtomicOrdering::Monotonic, Loc, 4262 [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { 4263 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4264 PrivateScope.addPrivate( 4265 VD, [&CGF, VD, XRValue, Loc]() -> Address { 4266 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 4267 CGF.emitOMPSimpleStore( 4268 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 4269 VD->getType().getNonReferenceType(), Loc); 4270 return LHSTemp; 4271 }); 4272 (void)PrivateScope.Privatize(); 4273 return CGF.EmitAnyExpr(UpExpr); 4274 }); 4275 }; 4276 if ((*IPriv)->getType()->isArrayType()) { 4277 // Emit atomic reduction for array section. 4278 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4279 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 4280 AtomicRedGen, XExpr, EExpr, UpExpr); 4281 } else 4282 // Emit atomic reduction for array subscript or single variable. 4283 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 4284 } else { 4285 // Emit as a critical region. 4286 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 4287 const Expr *, const Expr *) { 4288 auto &RT = CGF.CGM.getOpenMPRuntime(); 4289 RT.emitCriticalRegion( 4290 CGF, ".atomic_reduction", 4291 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 4292 Action.Enter(CGF); 4293 emitReductionCombiner(CGF, E); 4294 }, 4295 Loc); 4296 }; 4297 if ((*IPriv)->getType()->isArrayType()) { 4298 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4299 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4300 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4301 CritRedGen); 4302 } else 4303 CritRedGen(CGF, nullptr, nullptr, nullptr); 4304 } 4305 ++ILHS; 4306 ++IRHS; 4307 ++IPriv; 4308 } 4309 }; 4310 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 4311 if (!WithNowait) { 4312 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 4313 llvm::Value *EndArgs[] = { 4314 IdentTLoc, // ident_t *<loc> 4315 ThreadId, // i32 <gtid> 4316 Lock // kmp_critical_name *&<lock> 4317 }; 4318 CommonActionTy Action(nullptr, llvm::None, 4319 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 4320 EndArgs); 4321 AtomicRCG.setAction(Action); 4322 AtomicRCG(CGF); 4323 } else 4324 AtomicRCG(CGF); 4325 4326 CGF.EmitBranch(DefaultBB); 4327 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 4328 } 4329 4330 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 4331 SourceLocation Loc) { 4332 if (!CGF.HaveInsertPoint()) 4333 return; 4334 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 4335 // global_tid); 4336 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 4337 // Ignore return result until untied tasks are supported. 4338 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 4339 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4340 Region->emitUntiedSwitch(CGF); 4341 } 4342 4343 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 4344 OpenMPDirectiveKind InnerKind, 4345 const RegionCodeGenTy &CodeGen, 4346 bool HasCancel) { 4347 if (!CGF.HaveInsertPoint()) 4348 return; 4349 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 4350 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 4351 } 4352 4353 namespace { 4354 enum RTCancelKind { 4355 CancelNoreq = 0, 4356 CancelParallel = 1, 4357 CancelLoop = 2, 4358 CancelSections = 3, 4359 CancelTaskgroup = 4 4360 }; 4361 } // anonymous namespace 4362 4363 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 4364 RTCancelKind CancelKind = CancelNoreq; 4365 if (CancelRegion == OMPD_parallel) 4366 CancelKind = CancelParallel; 4367 else if (CancelRegion == OMPD_for) 4368 CancelKind = CancelLoop; 4369 else if (CancelRegion == OMPD_sections) 4370 CancelKind = CancelSections; 4371 else { 4372 assert(CancelRegion == OMPD_taskgroup); 4373 CancelKind = CancelTaskgroup; 4374 } 4375 return CancelKind; 4376 } 4377 4378 void CGOpenMPRuntime::emitCancellationPointCall( 4379 CodeGenFunction &CGF, SourceLocation Loc, 4380 OpenMPDirectiveKind CancelRegion) { 4381 if (!CGF.HaveInsertPoint()) 4382 return; 4383 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 4384 // global_tid, kmp_int32 cncl_kind); 4385 if (auto *OMPRegionInfo = 4386 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4387 if (OMPRegionInfo->hasCancel()) { 4388 llvm::Value *Args[] = { 4389 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 4390 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4391 // Ignore return result until untied tasks are supported. 4392 auto *Result = CGF.EmitRuntimeCall( 4393 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 4394 // if (__kmpc_cancellationpoint()) { 4395 // __kmpc_cancel_barrier(); 4396 // exit from construct; 4397 // } 4398 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4399 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4400 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4401 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4402 CGF.EmitBlock(ExitBB); 4403 // __kmpc_cancel_barrier(); 4404 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 4405 // exit from construct; 4406 auto CancelDest = 4407 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4408 CGF.EmitBranchThroughCleanup(CancelDest); 4409 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4410 } 4411 } 4412 } 4413 4414 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 4415 const Expr *IfCond, 4416 OpenMPDirectiveKind CancelRegion) { 4417 if (!CGF.HaveInsertPoint()) 4418 return; 4419 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 4420 // kmp_int32 cncl_kind); 4421 if (auto *OMPRegionInfo = 4422 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4423 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 4424 PrePostActionTy &) { 4425 auto &RT = CGF.CGM.getOpenMPRuntime(); 4426 llvm::Value *Args[] = { 4427 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 4428 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4429 // Ignore return result until untied tasks are supported. 4430 auto *Result = CGF.EmitRuntimeCall( 4431 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 4432 // if (__kmpc_cancel()) { 4433 // __kmpc_cancel_barrier(); 4434 // exit from construct; 4435 // } 4436 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4437 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4438 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4439 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4440 CGF.EmitBlock(ExitBB); 4441 // __kmpc_cancel_barrier(); 4442 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 4443 // exit from construct; 4444 auto CancelDest = 4445 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4446 CGF.EmitBranchThroughCleanup(CancelDest); 4447 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4448 }; 4449 if (IfCond) 4450 emitOMPIfClause(CGF, IfCond, ThenGen, 4451 [](CodeGenFunction &, PrePostActionTy &) {}); 4452 else { 4453 RegionCodeGenTy ThenRCG(ThenGen); 4454 ThenRCG(CGF); 4455 } 4456 } 4457 } 4458 4459 /// \brief Obtain information that uniquely identifies a target entry. This 4460 /// consists of the file and device IDs as well as line number associated with 4461 /// the relevant entry source location. 4462 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 4463 unsigned &DeviceID, unsigned &FileID, 4464 unsigned &LineNum) { 4465 4466 auto &SM = C.getSourceManager(); 4467 4468 // The loc should be always valid and have a file ID (the user cannot use 4469 // #pragma directives in macros) 4470 4471 assert(Loc.isValid() && "Source location is expected to be always valid."); 4472 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 4473 4474 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 4475 assert(PLoc.isValid() && "Source location is expected to be always valid."); 4476 4477 llvm::sys::fs::UniqueID ID; 4478 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 4479 llvm_unreachable("Source file with target region no longer exists!"); 4480 4481 DeviceID = ID.getDevice(); 4482 FileID = ID.getFile(); 4483 LineNum = PLoc.getLine(); 4484 } 4485 4486 void CGOpenMPRuntime::emitTargetOutlinedFunction( 4487 const OMPExecutableDirective &D, StringRef ParentName, 4488 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4489 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4490 assert(!ParentName.empty() && "Invalid target region parent name!"); 4491 4492 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 4493 IsOffloadEntry, CodeGen); 4494 } 4495 4496 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 4497 const OMPExecutableDirective &D, StringRef ParentName, 4498 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4499 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4500 // Create a unique name for the entry function using the source location 4501 // information of the current target region. The name will be something like: 4502 // 4503 // __omp_offloading_DD_FFFF_PP_lBB 4504 // 4505 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 4506 // mangled name of the function that encloses the target region and BB is the 4507 // line number of the target region. 4508 4509 unsigned DeviceID; 4510 unsigned FileID; 4511 unsigned Line; 4512 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 4513 Line); 4514 SmallString<64> EntryFnName; 4515 { 4516 llvm::raw_svector_ostream OS(EntryFnName); 4517 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 4518 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 4519 } 4520 4521 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4522 4523 CodeGenFunction CGF(CGM, true); 4524 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 4525 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4526 4527 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 4528 4529 // If this target outline function is not an offload entry, we don't need to 4530 // register it. 4531 if (!IsOffloadEntry) 4532 return; 4533 4534 // The target region ID is used by the runtime library to identify the current 4535 // target region, so it only has to be unique and not necessarily point to 4536 // anything. It could be the pointer to the outlined function that implements 4537 // the target region, but we aren't using that so that the compiler doesn't 4538 // need to keep that, and could therefore inline the host function if proven 4539 // worthwhile during optimization. In the other hand, if emitting code for the 4540 // device, the ID has to be the function address so that it can retrieved from 4541 // the offloading entry and launched by the runtime library. We also mark the 4542 // outlined function to have external linkage in case we are emitting code for 4543 // the device, because these functions will be entry points to the device. 4544 4545 if (CGM.getLangOpts().OpenMPIsDevice) { 4546 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 4547 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 4548 } else 4549 OutlinedFnID = new llvm::GlobalVariable( 4550 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 4551 llvm::GlobalValue::PrivateLinkage, 4552 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 4553 4554 // Register the information for the entry associated with this target region. 4555 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 4556 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); 4557 } 4558 4559 /// discard all CompoundStmts intervening between two constructs 4560 static const Stmt *ignoreCompoundStmts(const Stmt *Body) { 4561 while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) 4562 Body = CS->body_front(); 4563 4564 return Body; 4565 } 4566 4567 /// \brief Emit the num_teams clause of an enclosed teams directive at the 4568 /// target region scope. If there is no teams directive associated with the 4569 /// target directive, or if there is no num_teams clause associated with the 4570 /// enclosed teams directive, return nullptr. 4571 static llvm::Value * 4572 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4573 CodeGenFunction &CGF, 4574 const OMPExecutableDirective &D) { 4575 4576 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4577 "teams directive expected to be " 4578 "emitted only for the host!"); 4579 4580 // FIXME: For the moment we do not support combined directives with target and 4581 // teams, so we do not expect to get any num_teams clause in the provided 4582 // directive. Once we support that, this assertion can be replaced by the 4583 // actual emission of the clause expression. 4584 assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr && 4585 "Not expecting clause in directive."); 4586 4587 // If the current target region has a teams region enclosed, we need to get 4588 // the number of teams to pass to the runtime function call. This is done 4589 // by generating the expression in a inlined region. This is required because 4590 // the expression is captured in the enclosing target environment when the 4591 // teams directive is not combined with target. 4592 4593 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4594 4595 // FIXME: Accommodate other combined directives with teams when they become 4596 // available. 4597 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4598 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4599 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 4600 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4601 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4602 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 4603 return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, 4604 /*IsSigned=*/true); 4605 } 4606 4607 // If we have an enclosed teams directive but no num_teams clause we use 4608 // the default value 0. 4609 return CGF.Builder.getInt32(0); 4610 } 4611 4612 // No teams associated with the directive. 4613 return nullptr; 4614 } 4615 4616 /// \brief Emit the thread_limit clause of an enclosed teams directive at the 4617 /// target region scope. If there is no teams directive associated with the 4618 /// target directive, or if there is no thread_limit clause associated with the 4619 /// enclosed teams directive, return nullptr. 4620 static llvm::Value * 4621 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4622 CodeGenFunction &CGF, 4623 const OMPExecutableDirective &D) { 4624 4625 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4626 "teams directive expected to be " 4627 "emitted only for the host!"); 4628 4629 // FIXME: For the moment we do not support combined directives with target and 4630 // teams, so we do not expect to get any thread_limit clause in the provided 4631 // directive. Once we support that, this assertion can be replaced by the 4632 // actual emission of the clause expression. 4633 assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr && 4634 "Not expecting clause in directive."); 4635 4636 // If the current target region has a teams region enclosed, we need to get 4637 // the thread limit to pass to the runtime function call. This is done 4638 // by generating the expression in a inlined region. This is required because 4639 // the expression is captured in the enclosing target environment when the 4640 // teams directive is not combined with target. 4641 4642 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4643 4644 // FIXME: Accommodate other combined directives with teams when they become 4645 // available. 4646 if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( 4647 ignoreCompoundStmts(CS.getCapturedStmt()))) { 4648 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 4649 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4650 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4651 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 4652 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 4653 /*IsSigned=*/true); 4654 } 4655 4656 // If we have an enclosed teams directive but no thread_limit clause we use 4657 // the default value 0. 4658 return CGF.Builder.getInt32(0); 4659 } 4660 4661 // No teams associated with the directive. 4662 return nullptr; 4663 } 4664 4665 namespace { 4666 // \brief Utility to handle information from clauses associated with a given 4667 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 4668 // It provides a convenient interface to obtain the information and generate 4669 // code for that information. 4670 class MappableExprsHandler { 4671 public: 4672 /// \brief Values for bit flags used to specify the mapping type for 4673 /// offloading. 4674 enum OpenMPOffloadMappingFlags { 4675 /// \brief Only allocate memory on the device, 4676 OMP_MAP_ALLOC = 0x00, 4677 /// \brief Allocate memory on the device and move data from host to device. 4678 OMP_MAP_TO = 0x01, 4679 /// \brief Allocate memory on the device and move data from device to host. 4680 OMP_MAP_FROM = 0x02, 4681 /// \brief Always perform the requested mapping action on the element, even 4682 /// if it was already mapped before. 4683 OMP_MAP_ALWAYS = 0x04, 4684 /// \brief Decrement the reference count associated with the element without 4685 /// executing any other action. 4686 OMP_MAP_RELEASE = 0x08, 4687 /// \brief Delete the element from the device environment, ignoring the 4688 /// current reference count associated with the element. 4689 OMP_MAP_DELETE = 0x10, 4690 /// \brief The element passed to the device is a pointer. 4691 OMP_MAP_PTR = 0x20, 4692 /// \brief Signal the element as extra, i.e. is not argument to the target 4693 /// region kernel. 4694 OMP_MAP_EXTRA = 0x40, 4695 /// \brief Pass the element to the device by value. 4696 OMP_MAP_BYCOPY = 0x80, 4697 }; 4698 4699 typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; 4700 typedef SmallVector<unsigned, 16> MapFlagsArrayTy; 4701 4702 private: 4703 /// \brief Directive from where the map clauses were extracted. 4704 const OMPExecutableDirective &Directive; 4705 4706 /// \brief Function the directive is being generated for. 4707 CodeGenFunction &CGF; 4708 4709 llvm::Value *getExprTypeSize(const Expr *E) const { 4710 auto ExprTy = E->getType().getCanonicalType(); 4711 4712 // Reference types are ignored for mapping purposes. 4713 if (auto *RefTy = ExprTy->getAs<ReferenceType>()) 4714 ExprTy = RefTy->getPointeeType().getCanonicalType(); 4715 4716 // Given that an array section is considered a built-in type, we need to 4717 // do the calculation based on the length of the section instead of relying 4718 // on CGF.getTypeSize(E->getType()). 4719 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 4720 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 4721 OAE->getBase()->IgnoreParenImpCasts()) 4722 .getCanonicalType(); 4723 4724 // If there is no length associated with the expression, that means we 4725 // are using the whole length of the base. 4726 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 4727 return CGF.getTypeSize(BaseTy); 4728 4729 llvm::Value *ElemSize; 4730 if (auto *PTy = BaseTy->getAs<PointerType>()) 4731 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 4732 else { 4733 auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 4734 assert(ATy && "Expecting array type if not a pointer type."); 4735 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 4736 } 4737 4738 // If we don't have a length at this point, that is because we have an 4739 // array section with a single element. 4740 if (!OAE->getLength()) 4741 return ElemSize; 4742 4743 auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 4744 LengthVal = 4745 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 4746 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 4747 } 4748 return CGF.getTypeSize(ExprTy); 4749 } 4750 4751 /// \brief Return the corresponding bits for a given map clause modifier. Add 4752 /// a flag marking the map as a pointer if requested. Add a flag marking the 4753 /// map as extra, meaning is not an argument of the kernel. 4754 unsigned getMapTypeBits(OpenMPMapClauseKind MapType, 4755 OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, 4756 bool AddExtraFlag) const { 4757 unsigned Bits = 0u; 4758 switch (MapType) { 4759 case OMPC_MAP_alloc: 4760 Bits = OMP_MAP_ALLOC; 4761 break; 4762 case OMPC_MAP_to: 4763 Bits = OMP_MAP_TO; 4764 break; 4765 case OMPC_MAP_from: 4766 Bits = OMP_MAP_FROM; 4767 break; 4768 case OMPC_MAP_tofrom: 4769 Bits = OMP_MAP_TO | OMP_MAP_FROM; 4770 break; 4771 case OMPC_MAP_delete: 4772 Bits = OMP_MAP_DELETE; 4773 break; 4774 case OMPC_MAP_release: 4775 Bits = OMP_MAP_RELEASE; 4776 break; 4777 default: 4778 llvm_unreachable("Unexpected map type!"); 4779 break; 4780 } 4781 if (AddPtrFlag) 4782 Bits |= OMP_MAP_PTR; 4783 if (AddExtraFlag) 4784 Bits |= OMP_MAP_EXTRA; 4785 if (MapTypeModifier == OMPC_MAP_always) 4786 Bits |= OMP_MAP_ALWAYS; 4787 return Bits; 4788 } 4789 4790 /// \brief Return true if the provided expression is a final array section. A 4791 /// final array section, is one whose length can't be proved to be one. 4792 bool isFinalArraySectionExpression(const Expr *E) const { 4793 auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 4794 4795 // It is not an array section and therefore not a unity-size one. 4796 if (!OASE) 4797 return false; 4798 4799 // An array section with no colon always refer to a single element. 4800 if (OASE->getColonLoc().isInvalid()) 4801 return false; 4802 4803 auto *Length = OASE->getLength(); 4804 4805 // If we don't have a length we have to check if the array has size 1 4806 // for this dimension. Also, we should always expect a length if the 4807 // base type is pointer. 4808 if (!Length) { 4809 auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 4810 OASE->getBase()->IgnoreParenImpCasts()) 4811 .getCanonicalType(); 4812 if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 4813 return ATy->getSize().getSExtValue() != 1; 4814 // If we don't have a constant dimension length, we have to consider 4815 // the current section as having any size, so it is not necessarily 4816 // unitary. If it happen to be unity size, that's user fault. 4817 return true; 4818 } 4819 4820 // Check if the length evaluates to 1. 4821 llvm::APSInt ConstLength; 4822 if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) 4823 return true; // Can have more that size 1. 4824 4825 return ConstLength.getSExtValue() != 1; 4826 } 4827 4828 /// \brief Generate the base pointers, section pointers, sizes and map type 4829 /// bits for the provided map type, map modifier, and expression components. 4830 /// \a IsFirstComponent should be set to true if the provided set of 4831 /// components is the first associated with a capture. 4832 void generateInfoForComponentList( 4833 OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, 4834 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 4835 MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 4836 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 4837 bool IsFirstComponentList) const { 4838 4839 // The following summarizes what has to be generated for each map and the 4840 // types bellow. The generated information is expressed in this order: 4841 // base pointer, section pointer, size, flags 4842 // (to add to the ones that come from the map type and modifier). 4843 // 4844 // double d; 4845 // int i[100]; 4846 // float *p; 4847 // 4848 // struct S1 { 4849 // int i; 4850 // float f[50]; 4851 // } 4852 // struct S2 { 4853 // int i; 4854 // float f[50]; 4855 // S1 s; 4856 // double *p; 4857 // struct S2 *ps; 4858 // } 4859 // S2 s; 4860 // S2 *ps; 4861 // 4862 // map(d) 4863 // &d, &d, sizeof(double), noflags 4864 // 4865 // map(i) 4866 // &i, &i, 100*sizeof(int), noflags 4867 // 4868 // map(i[1:23]) 4869 // &i(=&i[0]), &i[1], 23*sizeof(int), noflags 4870 // 4871 // map(p) 4872 // &p, &p, sizeof(float*), noflags 4873 // 4874 // map(p[1:24]) 4875 // p, &p[1], 24*sizeof(float), noflags 4876 // 4877 // map(s) 4878 // &s, &s, sizeof(S2), noflags 4879 // 4880 // map(s.i) 4881 // &s, &(s.i), sizeof(int), noflags 4882 // 4883 // map(s.s.f) 4884 // &s, &(s.i.f), 50*sizeof(int), noflags 4885 // 4886 // map(s.p) 4887 // &s, &(s.p), sizeof(double*), noflags 4888 // 4889 // map(s.p[:22], s.a s.b) 4890 // &s, &(s.p), sizeof(double*), noflags 4891 // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag 4892 // 4893 // map(s.ps) 4894 // &s, &(s.ps), sizeof(S2*), noflags 4895 // 4896 // map(s.ps->s.i) 4897 // &s, &(s.ps), sizeof(S2*), noflags 4898 // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag 4899 // 4900 // map(s.ps->ps) 4901 // &s, &(s.ps), sizeof(S2*), noflags 4902 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 4903 // 4904 // map(s.ps->ps->ps) 4905 // &s, &(s.ps), sizeof(S2*), noflags 4906 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 4907 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 4908 // 4909 // map(s.ps->ps->s.f[:22]) 4910 // &s, &(s.ps), sizeof(S2*), noflags 4911 // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag 4912 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag 4913 // 4914 // map(ps) 4915 // &ps, &ps, sizeof(S2*), noflags 4916 // 4917 // map(ps->i) 4918 // ps, &(ps->i), sizeof(int), noflags 4919 // 4920 // map(ps->s.f) 4921 // ps, &(ps->s.f[0]), 50*sizeof(float), noflags 4922 // 4923 // map(ps->p) 4924 // ps, &(ps->p), sizeof(double*), noflags 4925 // 4926 // map(ps->p[:22]) 4927 // ps, &(ps->p), sizeof(double*), noflags 4928 // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag 4929 // 4930 // map(ps->ps) 4931 // ps, &(ps->ps), sizeof(S2*), noflags 4932 // 4933 // map(ps->ps->s.i) 4934 // ps, &(ps->ps), sizeof(S2*), noflags 4935 // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag 4936 // 4937 // map(ps->ps->ps) 4938 // ps, &(ps->ps), sizeof(S2*), noflags 4939 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 4940 // 4941 // map(ps->ps->ps->ps) 4942 // ps, &(ps->ps), sizeof(S2*), noflags 4943 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 4944 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 4945 // 4946 // map(ps->ps->ps->s.f[:22]) 4947 // ps, &(ps->ps), sizeof(S2*), noflags 4948 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag 4949 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + 4950 // extra_flag 4951 4952 // Track if the map information being generated is the first for a capture. 4953 bool IsCaptureFirstInfo = IsFirstComponentList; 4954 4955 // Scan the components from the base to the complete expression. 4956 auto CI = Components.rbegin(); 4957 auto CE = Components.rend(); 4958 auto I = CI; 4959 4960 // Track if the map information being generated is the first for a list of 4961 // components. 4962 bool IsExpressionFirstInfo = true; 4963 llvm::Value *BP = nullptr; 4964 4965 if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { 4966 // The base is the 'this' pointer. The content of the pointer is going 4967 // to be the base of the field being mapped. 4968 BP = CGF.EmitScalarExpr(ME->getBase()); 4969 } else { 4970 // The base is the reference to the variable. 4971 // BP = &Var. 4972 BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) 4973 .getPointer(); 4974 4975 // If the variable is a pointer and is being dereferenced (i.e. is not 4976 // the last component), the base has to be the pointer itself, not his 4977 // reference. 4978 if (I->getAssociatedDeclaration()->getType()->isAnyPointerType() && 4979 std::next(I) != CE) { 4980 auto PtrAddr = CGF.MakeNaturalAlignAddrLValue( 4981 BP, I->getAssociatedDeclaration()->getType()); 4982 BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), 4983 I->getAssociatedDeclaration() 4984 ->getType() 4985 ->getAs<PointerType>()) 4986 .getPointer(); 4987 4988 // We do not need to generate individual map information for the 4989 // pointer, it can be associated with the combined storage. 4990 ++I; 4991 } 4992 } 4993 4994 for (; I != CE; ++I) { 4995 auto Next = std::next(I); 4996 4997 // We need to generate the addresses and sizes if this is the last 4998 // component, if the component is a pointer or if it is an array section 4999 // whose length can't be proved to be one. If this is a pointer, it 5000 // becomes the base address for the following components. 5001 5002 // A final array section, is one whose length can't be proved to be one. 5003 bool IsFinalArraySection = 5004 isFinalArraySectionExpression(I->getAssociatedExpression()); 5005 5006 // Get information on whether the element is a pointer. Have to do a 5007 // special treatment for array sections given that they are built-in 5008 // types. 5009 const auto *OASE = 5010 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 5011 bool IsPointer = 5012 (OASE && 5013 OMPArraySectionExpr::getBaseOriginalType(OASE) 5014 .getCanonicalType() 5015 ->isAnyPointerType()) || 5016 I->getAssociatedExpression()->getType()->isAnyPointerType(); 5017 5018 if (Next == CE || IsPointer || IsFinalArraySection) { 5019 5020 // If this is not the last component, we expect the pointer to be 5021 // associated with an array expression or member expression. 5022 assert((Next == CE || 5023 isa<MemberExpr>(Next->getAssociatedExpression()) || 5024 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 5025 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 5026 "Unexpected expression"); 5027 5028 // Save the base we are currently using. 5029 BasePointers.push_back(BP); 5030 5031 auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); 5032 auto *Size = getExprTypeSize(I->getAssociatedExpression()); 5033 5034 Pointers.push_back(LB); 5035 Sizes.push_back(Size); 5036 // We need to add a pointer flag for each map that comes from the the 5037 // same expression except for the first one. We need to add the extra 5038 // flag for each map that relates with the current capture, except for 5039 // the first one (there is a set of entries for each capture). 5040 Types.push_back(getMapTypeBits(MapType, MapTypeModifier, 5041 !IsExpressionFirstInfo, 5042 !IsCaptureFirstInfo)); 5043 5044 // If we have a final array section, we are done with this expression. 5045 if (IsFinalArraySection) 5046 break; 5047 5048 // The pointer becomes the base for the next element. 5049 if (Next != CE) 5050 BP = LB; 5051 5052 IsExpressionFirstInfo = false; 5053 IsCaptureFirstInfo = false; 5054 continue; 5055 } 5056 } 5057 } 5058 5059 public: 5060 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 5061 : Directive(Dir), CGF(CGF) {} 5062 5063 /// \brief Generate all the base pointers, section pointers, sizes and map 5064 /// types for the extracted mappable expressions. 5065 void generateAllInfo(MapValuesArrayTy &BasePointers, 5066 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 5067 MapFlagsArrayTy &Types) const { 5068 BasePointers.clear(); 5069 Pointers.clear(); 5070 Sizes.clear(); 5071 Types.clear(); 5072 5073 struct MapInfo { 5074 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 5075 OpenMPMapClauseKind MapType; 5076 OpenMPMapClauseKind MapTypeModifier; 5077 }; 5078 5079 // We have to process the component lists that relate with the same 5080 // declaration in a single chunk so that we can generate the map flags 5081 // correctly. Therefore, we organize all lists in a map. 5082 llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 5083 for (auto *C : Directive.getClausesOfKind<OMPMapClause>()) 5084 for (auto L : C->component_lists()) { 5085 const ValueDecl *VD = 5086 L.first ? cast<ValueDecl>(L.first->getCanonicalDecl()) : nullptr; 5087 Info[VD].push_back( 5088 {L.second, C->getMapType(), C->getMapTypeModifier()}); 5089 } 5090 5091 for (auto &M : Info) { 5092 // We need to know when we generate information for the first component 5093 // associated with a capture, because the mapping flags depend on it. 5094 bool IsFirstComponentList = true; 5095 for (MapInfo &L : M.second) { 5096 assert(!L.Components.empty() && 5097 "Not expecting declaration with no component lists."); 5098 generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components, 5099 BasePointers, Pointers, Sizes, Types, 5100 IsFirstComponentList); 5101 IsFirstComponentList = false; 5102 } 5103 } 5104 } 5105 5106 /// \brief Generate the base pointers, section pointers, sizes and map types 5107 /// associated to a given capture. 5108 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 5109 MapValuesArrayTy &BasePointers, 5110 MapValuesArrayTy &Pointers, 5111 MapValuesArrayTy &Sizes, 5112 MapFlagsArrayTy &Types) const { 5113 assert(!Cap->capturesVariableArrayType() && 5114 "Not expecting to generate map info for a variable array type!"); 5115 5116 BasePointers.clear(); 5117 Pointers.clear(); 5118 Sizes.clear(); 5119 Types.clear(); 5120 5121 const ValueDecl *VD = 5122 Cap->capturesThis() 5123 ? nullptr 5124 : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); 5125 5126 // We need to know when we generating information for the first component 5127 // associated with a capture, because the mapping flags depend on it. 5128 bool IsFirstComponentList = true; 5129 for (auto *C : Directive.getClausesOfKind<OMPMapClause>()) 5130 for (auto L : C->decl_component_lists(VD)) { 5131 assert(L.first == VD && 5132 "We got information for the wrong declaration??"); 5133 assert(!L.second.empty() && 5134 "Not expecting declaration with no component lists."); 5135 generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), 5136 L.second, BasePointers, Pointers, Sizes, 5137 Types, IsFirstComponentList); 5138 IsFirstComponentList = false; 5139 } 5140 5141 return; 5142 } 5143 }; 5144 5145 enum OpenMPOffloadingReservedDeviceIDs { 5146 /// \brief Device ID if the device was not defined, runtime should get it 5147 /// from environment variables in the spec. 5148 OMP_DEVICEID_UNDEF = -1, 5149 }; 5150 } // anonymous namespace 5151 5152 /// \brief Emit the arrays used to pass the captures and map information to the 5153 /// offloading runtime library. If there is no map or capture information, 5154 /// return nullptr by reference. 5155 static void 5156 emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray, 5157 llvm::Value *&PointersArray, llvm::Value *&SizesArray, 5158 llvm::Value *&MapTypesArray, 5159 MappableExprsHandler::MapValuesArrayTy &BasePointers, 5160 MappableExprsHandler::MapValuesArrayTy &Pointers, 5161 MappableExprsHandler::MapValuesArrayTy &Sizes, 5162 MappableExprsHandler::MapFlagsArrayTy &MapTypes) { 5163 auto &CGM = CGF.CGM; 5164 auto &Ctx = CGF.getContext(); 5165 5166 BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr; 5167 5168 if (unsigned PointerNumVal = BasePointers.size()) { 5169 // Detect if we have any capture size requiring runtime evaluation of the 5170 // size so that a constant array could be eventually used. 5171 bool hasRuntimeEvaluationCaptureSize = false; 5172 for (auto *S : Sizes) 5173 if (!isa<llvm::Constant>(S)) { 5174 hasRuntimeEvaluationCaptureSize = true; 5175 break; 5176 } 5177 5178 llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); 5179 QualType PointerArrayType = 5180 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 5181 /*IndexTypeQuals=*/0); 5182 5183 BasePointersArray = 5184 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 5185 PointersArray = 5186 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 5187 5188 // If we don't have any VLA types or other types that require runtime 5189 // evaluation, we can use a constant array for the map sizes, otherwise we 5190 // need to fill up the arrays as we do for the pointers. 5191 if (hasRuntimeEvaluationCaptureSize) { 5192 QualType SizeArrayType = Ctx.getConstantArrayType( 5193 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 5194 /*IndexTypeQuals=*/0); 5195 SizesArray = 5196 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 5197 } else { 5198 // We expect all the sizes to be constant, so we collect them to create 5199 // a constant array. 5200 SmallVector<llvm::Constant *, 16> ConstSizes; 5201 for (auto S : Sizes) 5202 ConstSizes.push_back(cast<llvm::Constant>(S)); 5203 5204 auto *SizesArrayInit = llvm::ConstantArray::get( 5205 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 5206 auto *SizesArrayGbl = new llvm::GlobalVariable( 5207 CGM.getModule(), SizesArrayInit->getType(), 5208 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5209 SizesArrayInit, ".offload_sizes"); 5210 SizesArrayGbl->setUnnamedAddr(true); 5211 SizesArray = SizesArrayGbl; 5212 } 5213 5214 // The map types are always constant so we don't need to generate code to 5215 // fill arrays. Instead, we create an array constant. 5216 llvm::Constant *MapTypesArrayInit = 5217 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 5218 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 5219 CGM.getModule(), MapTypesArrayInit->getType(), 5220 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 5221 MapTypesArrayInit, ".offload_maptypes"); 5222 MapTypesArrayGbl->setUnnamedAddr(true); 5223 MapTypesArray = MapTypesArrayGbl; 5224 5225 for (unsigned i = 0; i < PointerNumVal; ++i) { 5226 llvm::Value *BPVal = BasePointers[i]; 5227 if (BPVal->getType()->isPointerTy()) 5228 BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); 5229 else { 5230 assert(BPVal->getType()->isIntegerTy() && 5231 "If not a pointer, the value type must be an integer."); 5232 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); 5233 } 5234 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 5235 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, 5236 0, i); 5237 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5238 CGF.Builder.CreateStore(BPVal, BPAddr); 5239 5240 llvm::Value *PVal = Pointers[i]; 5241 if (PVal->getType()->isPointerTy()) 5242 PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); 5243 else { 5244 assert(PVal->getType()->isIntegerTy() && 5245 "If not a pointer, the value type must be an integer."); 5246 PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); 5247 } 5248 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 5249 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0, 5250 i); 5251 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 5252 CGF.Builder.CreateStore(PVal, PAddr); 5253 5254 if (hasRuntimeEvaluationCaptureSize) { 5255 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 5256 llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 5257 /*Idx0=*/0, 5258 /*Idx1=*/i); 5259 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 5260 CGF.Builder.CreateStore( 5261 CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), 5262 SAddr); 5263 } 5264 } 5265 } 5266 } 5267 /// \brief Emit the arguments to be passed to the runtime library based on the 5268 /// arrays of pointers, sizes and map types. 5269 static void emitOffloadingArraysArgument( 5270 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 5271 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 5272 llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray, 5273 llvm::Value *PointersArray, llvm::Value *SizesArray, 5274 llvm::Value *MapTypesArray, unsigned NumElems) { 5275 auto &CGM = CGF.CGM; 5276 if (NumElems) { 5277 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5278 llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray, 5279 /*Idx0=*/0, /*Idx1=*/0); 5280 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5281 llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray, 5282 /*Idx0=*/0, 5283 /*Idx1=*/0); 5284 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5285 llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray, 5286 /*Idx0=*/0, /*Idx1=*/0); 5287 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 5288 llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray, 5289 /*Idx0=*/0, 5290 /*Idx1=*/0); 5291 } else { 5292 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5293 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 5294 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 5295 MapTypesArrayArg = 5296 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 5297 } 5298 } 5299 5300 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 5301 const OMPExecutableDirective &D, 5302 llvm::Value *OutlinedFn, 5303 llvm::Value *OutlinedFnID, 5304 const Expr *IfCond, const Expr *Device, 5305 ArrayRef<llvm::Value *> CapturedVars) { 5306 if (!CGF.HaveInsertPoint()) 5307 return; 5308 5309 assert(OutlinedFn && "Invalid outlined function!"); 5310 5311 auto &Ctx = CGF.getContext(); 5312 5313 // Fill up the arrays with all the captured variables. 5314 MappableExprsHandler::MapValuesArrayTy KernelArgs; 5315 MappableExprsHandler::MapValuesArrayTy BasePointers; 5316 MappableExprsHandler::MapValuesArrayTy Pointers; 5317 MappableExprsHandler::MapValuesArrayTy Sizes; 5318 MappableExprsHandler::MapFlagsArrayTy MapTypes; 5319 5320 MappableExprsHandler::MapValuesArrayTy CurBasePointers; 5321 MappableExprsHandler::MapValuesArrayTy CurPointers; 5322 MappableExprsHandler::MapValuesArrayTy CurSizes; 5323 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 5324 5325 // Get map clause information. 5326 MappableExprsHandler MCHandler(D, CGF); 5327 5328 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 5329 auto RI = CS.getCapturedRecordDecl()->field_begin(); 5330 auto CV = CapturedVars.begin(); 5331 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 5332 CE = CS.capture_end(); 5333 CI != CE; ++CI, ++RI, ++CV) { 5334 StringRef Name; 5335 QualType Ty; 5336 5337 CurBasePointers.clear(); 5338 CurPointers.clear(); 5339 CurSizes.clear(); 5340 CurMapTypes.clear(); 5341 5342 // VLA sizes are passed to the outlined region by copy and do not have map 5343 // information associated. 5344 if (CI->capturesVariableArrayType()) { 5345 CurBasePointers.push_back(*CV); 5346 CurPointers.push_back(*CV); 5347 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 5348 // Copy to the device as an argument. No need to retrieve it. 5349 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_BYCOPY); 5350 } else { 5351 // If we have any information in the map clause, we use it, otherwise we 5352 // just do a default mapping. 5353 MCHandler.generateInfoForCapture(CI, CurBasePointers, CurPointers, 5354 CurSizes, CurMapTypes); 5355 5356 if (CurBasePointers.empty()) { 5357 // Do the default mapping. 5358 if (CI->capturesThis()) { 5359 CurBasePointers.push_back(*CV); 5360 CurPointers.push_back(*CV); 5361 const PointerType *PtrTy = 5362 cast<PointerType>(RI->getType().getTypePtr()); 5363 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); 5364 // Default map type. 5365 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO | 5366 MappableExprsHandler::OMP_MAP_FROM); 5367 } else if (CI->capturesVariableByCopy()) { 5368 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_BYCOPY); 5369 if (!RI->getType()->isAnyPointerType()) { 5370 // If the field is not a pointer, we need to save the actual value 5371 // and 5372 // load it as a void pointer. 5373 auto DstAddr = CGF.CreateMemTemp( 5374 Ctx.getUIntPtrType(), 5375 Twine(CI->getCapturedVar()->getName()) + ".casted"); 5376 LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 5377 5378 auto *SrcAddrVal = CGF.EmitScalarConversion( 5379 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 5380 Ctx.getPointerType(RI->getType()), SourceLocation()); 5381 LValue SrcLV = 5382 CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType()); 5383 5384 // Store the value using the source type pointer. 5385 CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV); 5386 5387 // Load the value using the destination type pointer. 5388 CurBasePointers.push_back( 5389 CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal()); 5390 CurPointers.push_back(CurBasePointers.back()); 5391 } else { 5392 CurBasePointers.push_back(*CV); 5393 CurPointers.push_back(*CV); 5394 } 5395 CurSizes.push_back(CGF.getTypeSize(RI->getType())); 5396 } else { 5397 assert(CI->capturesVariable() && "Expected captured reference."); 5398 CurBasePointers.push_back(*CV); 5399 CurPointers.push_back(*CV); 5400 5401 const ReferenceType *PtrTy = 5402 cast<ReferenceType>(RI->getType().getTypePtr()); 5403 QualType ElementType = PtrTy->getPointeeType(); 5404 CurSizes.push_back(CGF.getTypeSize(ElementType)); 5405 // The default map type for a scalar/complex type is 'to' because by 5406 // default the value doesn't have to be retrieved. For an aggregate 5407 // type, 5408 // the default is 'tofrom'. 5409 CurMapTypes.push_back(ElementType->isAggregateType() 5410 ? (MappableExprsHandler::OMP_MAP_TO | 5411 MappableExprsHandler::OMP_MAP_FROM) 5412 : MappableExprsHandler::OMP_MAP_TO); 5413 } 5414 } 5415 } 5416 // We expect to have at least an element of information for this capture. 5417 assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); 5418 assert(CurBasePointers.size() == CurPointers.size() && 5419 CurBasePointers.size() == CurSizes.size() && 5420 CurBasePointers.size() == CurMapTypes.size() && 5421 "Inconsistent map information sizes!"); 5422 5423 // The kernel args are always the first elements of the base pointers 5424 // associated with a capture. 5425 KernelArgs.push_back(CurBasePointers.front()); 5426 // We need to append the results of this capture to what we already have. 5427 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 5428 Pointers.append(CurPointers.begin(), CurPointers.end()); 5429 Sizes.append(CurSizes.begin(), CurSizes.end()); 5430 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 5431 } 5432 5433 // Keep track on whether the host function has to be executed. 5434 auto OffloadErrorQType = 5435 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 5436 auto OffloadError = CGF.MakeAddrLValue( 5437 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 5438 OffloadErrorQType); 5439 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 5440 OffloadError); 5441 5442 // Fill up the pointer arrays and transfer execution to the device. 5443 auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, 5444 OutlinedFnID, OffloadError, OffloadErrorQType, 5445 &D](CodeGenFunction &CGF, PrePostActionTy &) { 5446 auto &RT = CGF.CGM.getOpenMPRuntime(); 5447 // Emit the offloading arrays. 5448 llvm::Value *BasePointersArray; 5449 llvm::Value *PointersArray; 5450 llvm::Value *SizesArray; 5451 llvm::Value *MapTypesArray; 5452 emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, 5453 MapTypesArray, BasePointers, Pointers, Sizes, 5454 MapTypes); 5455 emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray, 5456 SizesArray, MapTypesArray, BasePointersArray, 5457 PointersArray, SizesArray, MapTypesArray, 5458 BasePointers.size()); 5459 5460 // On top of the arrays that were filled up, the target offloading call 5461 // takes as arguments the device id as well as the host pointer. The host 5462 // pointer is used by the runtime library to identify the current target 5463 // region, so it only has to be unique and not necessarily point to 5464 // anything. It could be the pointer to the outlined function that 5465 // implements the target region, but we aren't using that so that the 5466 // compiler doesn't need to keep that, and could therefore inline the host 5467 // function if proven worthwhile during optimization. 5468 5469 // From this point on, we need to have an ID of the target region defined. 5470 assert(OutlinedFnID && "Invalid outlined function ID!"); 5471 5472 // Emit device ID if any. 5473 llvm::Value *DeviceID; 5474 if (Device) 5475 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5476 CGF.Int32Ty, /*isSigned=*/true); 5477 else 5478 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 5479 5480 // Emit the number of elements in the offloading arrays. 5481 llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 5482 5483 // Return value of the runtime offloading call. 5484 llvm::Value *Return; 5485 5486 auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); 5487 auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); 5488 5489 // If we have NumTeams defined this means that we have an enclosed teams 5490 // region. Therefore we also expect to have ThreadLimit defined. These two 5491 // values should be defined in the presence of a teams directive, regardless 5492 // of having any clauses associated. If the user is using teams but no 5493 // clauses, these two values will be the default that should be passed to 5494 // the runtime library - a 32-bit integer with the value zero. 5495 if (NumTeams) { 5496 assert(ThreadLimit && "Thread limit expression should be available along " 5497 "with number of teams."); 5498 llvm::Value *OffloadingArgs[] = { 5499 DeviceID, OutlinedFnID, PointerNum, 5500 BasePointersArray, PointersArray, SizesArray, 5501 MapTypesArray, NumTeams, ThreadLimit}; 5502 Return = CGF.EmitRuntimeCall( 5503 RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); 5504 } else { 5505 llvm::Value *OffloadingArgs[] = { 5506 DeviceID, OutlinedFnID, PointerNum, BasePointersArray, 5507 PointersArray, SizesArray, MapTypesArray}; 5508 Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), 5509 OffloadingArgs); 5510 } 5511 5512 CGF.EmitStoreOfScalar(Return, OffloadError); 5513 }; 5514 5515 // Notify that the host version must be executed. 5516 auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { 5517 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), 5518 OffloadError); 5519 }; 5520 5521 // If we have a target function ID it means that we need to support 5522 // offloading, otherwise, just execute on the host. We need to execute on host 5523 // regardless of the conditional in the if clause if, e.g., the user do not 5524 // specify target triples. 5525 if (OutlinedFnID) { 5526 if (IfCond) 5527 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 5528 else { 5529 RegionCodeGenTy ThenRCG(ThenGen); 5530 ThenRCG(CGF); 5531 } 5532 } else { 5533 RegionCodeGenTy ElseRCG(ElseGen); 5534 ElseRCG(CGF); 5535 } 5536 5537 // Check the error code and execute the host version if required. 5538 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 5539 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 5540 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 5541 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 5542 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 5543 5544 CGF.EmitBlock(OffloadFailedBlock); 5545 CGF.Builder.CreateCall(OutlinedFn, KernelArgs); 5546 CGF.EmitBranch(OffloadContBlock); 5547 5548 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 5549 } 5550 5551 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 5552 StringRef ParentName) { 5553 if (!S) 5554 return; 5555 5556 // If we find a OMP target directive, codegen the outline function and 5557 // register the result. 5558 // FIXME: Add other directives with target when they become supported. 5559 bool isTargetDirective = isa<OMPTargetDirective>(S); 5560 5561 if (isTargetDirective) { 5562 auto *E = cast<OMPExecutableDirective>(S); 5563 unsigned DeviceID; 5564 unsigned FileID; 5565 unsigned Line; 5566 getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, 5567 FileID, Line); 5568 5569 // Is this a target region that should not be emitted as an entry point? If 5570 // so just signal we are done with this target region. 5571 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 5572 ParentName, Line)) 5573 return; 5574 5575 llvm::Function *Fn; 5576 llvm::Constant *Addr; 5577 std::tie(Fn, Addr) = 5578 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( 5579 CGM, cast<OMPTargetDirective>(*E), ParentName, 5580 /*isOffloadEntry=*/true); 5581 assert(Fn && Addr && "Target region emission failed."); 5582 return; 5583 } 5584 5585 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 5586 if (!E->getAssociatedStmt()) 5587 return; 5588 5589 scanForTargetRegionsFunctions( 5590 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 5591 ParentName); 5592 return; 5593 } 5594 5595 // If this is a lambda function, look into its body. 5596 if (auto *L = dyn_cast<LambdaExpr>(S)) 5597 S = L->getBody(); 5598 5599 // Keep looking for target regions recursively. 5600 for (auto *II : S->children()) 5601 scanForTargetRegionsFunctions(II, ParentName); 5602 } 5603 5604 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 5605 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 5606 5607 // If emitting code for the host, we do not process FD here. Instead we do 5608 // the normal code generation. 5609 if (!CGM.getLangOpts().OpenMPIsDevice) 5610 return false; 5611 5612 // Try to detect target regions in the function. 5613 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 5614 5615 // We should not emit any function othen that the ones created during the 5616 // scanning. Therefore, we signal that this function is completely dealt 5617 // with. 5618 return true; 5619 } 5620 5621 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 5622 if (!CGM.getLangOpts().OpenMPIsDevice) 5623 return false; 5624 5625 // Check if there are Ctors/Dtors in this declaration and look for target 5626 // regions in it. We use the complete variant to produce the kernel name 5627 // mangling. 5628 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 5629 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 5630 for (auto *Ctor : RD->ctors()) { 5631 StringRef ParentName = 5632 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 5633 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 5634 } 5635 auto *Dtor = RD->getDestructor(); 5636 if (Dtor) { 5637 StringRef ParentName = 5638 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 5639 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 5640 } 5641 } 5642 5643 // If we are in target mode we do not emit any global (declare target is not 5644 // implemented yet). Therefore we signal that GD was processed in this case. 5645 return true; 5646 } 5647 5648 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 5649 auto *VD = GD.getDecl(); 5650 if (isa<FunctionDecl>(VD)) 5651 return emitTargetFunctions(GD); 5652 5653 return emitTargetGlobalVariable(GD); 5654 } 5655 5656 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 5657 // If we have offloading in the current module, we need to emit the entries 5658 // now and register the offloading descriptor. 5659 createOffloadEntriesAndInfoMetadata(); 5660 5661 // Create and register the offloading binary descriptors. This is the main 5662 // entity that captures all the information about offloading in the current 5663 // compilation unit. 5664 return createOffloadingBinaryDescriptorRegistration(); 5665 } 5666 5667 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 5668 const OMPExecutableDirective &D, 5669 SourceLocation Loc, 5670 llvm::Value *OutlinedFn, 5671 ArrayRef<llvm::Value *> CapturedVars) { 5672 if (!CGF.HaveInsertPoint()) 5673 return; 5674 5675 auto *RTLoc = emitUpdateLocation(CGF, Loc); 5676 CodeGenFunction::RunCleanupsScope Scope(CGF); 5677 5678 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 5679 llvm::Value *Args[] = { 5680 RTLoc, 5681 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 5682 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 5683 llvm::SmallVector<llvm::Value *, 16> RealArgs; 5684 RealArgs.append(std::begin(Args), std::end(Args)); 5685 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 5686 5687 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 5688 CGF.EmitRuntimeCall(RTLFn, RealArgs); 5689 } 5690 5691 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 5692 const Expr *NumTeams, 5693 const Expr *ThreadLimit, 5694 SourceLocation Loc) { 5695 if (!CGF.HaveInsertPoint()) 5696 return; 5697 5698 auto *RTLoc = emitUpdateLocation(CGF, Loc); 5699 5700 llvm::Value *NumTeamsVal = 5701 (NumTeams) 5702 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 5703 CGF.CGM.Int32Ty, /* isSigned = */ true) 5704 : CGF.Builder.getInt32(0); 5705 5706 llvm::Value *ThreadLimitVal = 5707 (ThreadLimit) 5708 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 5709 CGF.CGM.Int32Ty, /* isSigned = */ true) 5710 : CGF.Builder.getInt32(0); 5711 5712 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 5713 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 5714 ThreadLimitVal}; 5715 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 5716 PushNumTeamsArgs); 5717 } 5718 5719 void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF, 5720 const OMPExecutableDirective &D, 5721 const Expr *IfCond, 5722 const Expr *Device, 5723 const RegionCodeGenTy &CodeGen) { 5724 5725 if (!CGF.HaveInsertPoint()) 5726 return; 5727 5728 llvm::Value *BasePointersArray = nullptr; 5729 llvm::Value *PointersArray = nullptr; 5730 llvm::Value *SizesArray = nullptr; 5731 llvm::Value *MapTypesArray = nullptr; 5732 unsigned NumOfPtrs = 0; 5733 5734 // Generate the code for the opening of the data environment. Capture all the 5735 // arguments of the runtime call by reference because they are used in the 5736 // closing of the region. 5737 auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray, 5738 &SizesArray, &MapTypesArray, Device, 5739 &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { 5740 // Fill up the arrays with all the mapped variables. 5741 MappableExprsHandler::MapValuesArrayTy BasePointers; 5742 MappableExprsHandler::MapValuesArrayTy Pointers; 5743 MappableExprsHandler::MapValuesArrayTy Sizes; 5744 MappableExprsHandler::MapFlagsArrayTy MapTypes; 5745 5746 // Get map clause information. 5747 MappableExprsHandler MCHandler(D, CGF); 5748 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 5749 NumOfPtrs = BasePointers.size(); 5750 5751 // Fill up the arrays and create the arguments. 5752 emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, 5753 MapTypesArray, BasePointers, Pointers, Sizes, 5754 MapTypes); 5755 5756 llvm::Value *BasePointersArrayArg = nullptr; 5757 llvm::Value *PointersArrayArg = nullptr; 5758 llvm::Value *SizesArrayArg = nullptr; 5759 llvm::Value *MapTypesArrayArg = nullptr; 5760 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 5761 SizesArrayArg, MapTypesArrayArg, 5762 BasePointersArray, PointersArray, SizesArray, 5763 MapTypesArray, NumOfPtrs); 5764 5765 // Emit device ID if any. 5766 llvm::Value *DeviceID = nullptr; 5767 if (Device) 5768 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5769 CGF.Int32Ty, /*isSigned=*/true); 5770 else 5771 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 5772 5773 // Emit the number of elements in the offloading arrays. 5774 auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); 5775 5776 llvm::Value *OffloadingArgs[] = { 5777 DeviceID, PointerNum, BasePointersArrayArg, 5778 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 5779 auto &RT = CGF.CGM.getOpenMPRuntime(); 5780 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), 5781 OffloadingArgs); 5782 }; 5783 5784 // Generate code for the closing of the data region. 5785 auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray, 5786 &MapTypesArray, Device, 5787 &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { 5788 assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray && 5789 NumOfPtrs && "Invalid data environment closing arguments."); 5790 5791 llvm::Value *BasePointersArrayArg = nullptr; 5792 llvm::Value *PointersArrayArg = nullptr; 5793 llvm::Value *SizesArrayArg = nullptr; 5794 llvm::Value *MapTypesArrayArg = nullptr; 5795 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 5796 SizesArrayArg, MapTypesArrayArg, 5797 BasePointersArray, PointersArray, SizesArray, 5798 MapTypesArray, NumOfPtrs); 5799 5800 // Emit device ID if any. 5801 llvm::Value *DeviceID = nullptr; 5802 if (Device) 5803 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5804 CGF.Int32Ty, /*isSigned=*/true); 5805 else 5806 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 5807 5808 // Emit the number of elements in the offloading arrays. 5809 auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); 5810 5811 llvm::Value *OffloadingArgs[] = { 5812 DeviceID, PointerNum, BasePointersArrayArg, 5813 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 5814 auto &RT = CGF.CGM.getOpenMPRuntime(); 5815 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), 5816 OffloadingArgs); 5817 }; 5818 5819 // In the event we get an if clause, we don't have to take any action on the 5820 // else side. 5821 auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 5822 5823 if (IfCond) { 5824 emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen); 5825 } else { 5826 RegionCodeGenTy BeginThenRCG(BeginThenGen); 5827 BeginThenRCG(CGF); 5828 } 5829 5830 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen); 5831 5832 if (IfCond) { 5833 emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen); 5834 } else { 5835 RegionCodeGenTy EndThenRCG(EndThenGen); 5836 EndThenRCG(CGF); 5837 } 5838 } 5839 5840 void CGOpenMPRuntime::emitTargetEnterOrExitDataCall( 5841 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 5842 const Expr *Device) { 5843 if (!CGF.HaveInsertPoint()) 5844 return; 5845 5846 assert((isa<OMPTargetEnterDataDirective>(D) || 5847 isa<OMPTargetExitDataDirective>(D)) && 5848 "Expecting either target enter or exit data directives."); 5849 5850 // Generate the code for the opening of the data environment. 5851 auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { 5852 // Fill up the arrays with all the mapped variables. 5853 MappableExprsHandler::MapValuesArrayTy BasePointers; 5854 MappableExprsHandler::MapValuesArrayTy Pointers; 5855 MappableExprsHandler::MapValuesArrayTy Sizes; 5856 MappableExprsHandler::MapFlagsArrayTy MapTypes; 5857 5858 // Get map clause information. 5859 MappableExprsHandler MCHandler(D, CGF); 5860 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 5861 5862 llvm::Value *BasePointersArrayArg = nullptr; 5863 llvm::Value *PointersArrayArg = nullptr; 5864 llvm::Value *SizesArrayArg = nullptr; 5865 llvm::Value *MapTypesArrayArg = nullptr; 5866 5867 // Fill up the arrays and create the arguments. 5868 emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg, 5869 SizesArrayArg, MapTypesArrayArg, BasePointers, 5870 Pointers, Sizes, MapTypes); 5871 emitOffloadingArraysArgument( 5872 CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, 5873 MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, 5874 MapTypesArrayArg, BasePointers.size()); 5875 5876 // Emit device ID if any. 5877 llvm::Value *DeviceID = nullptr; 5878 if (Device) 5879 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5880 CGF.Int32Ty, /*isSigned=*/true); 5881 else 5882 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 5883 5884 // Emit the number of elements in the offloading arrays. 5885 auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); 5886 5887 llvm::Value *OffloadingArgs[] = { 5888 DeviceID, PointerNum, BasePointersArrayArg, 5889 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 5890 auto &RT = CGF.CGM.getOpenMPRuntime(); 5891 CGF.EmitRuntimeCall( 5892 RT.createRuntimeFunction(isa<OMPTargetEnterDataDirective>(D) 5893 ? OMPRTL__tgt_target_data_begin 5894 : OMPRTL__tgt_target_data_end), 5895 OffloadingArgs); 5896 }; 5897 5898 // In the event we get an if clause, we don't have to take any action on the 5899 // else side. 5900 auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 5901 5902 if (IfCond) { 5903 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 5904 } else { 5905 RegionCodeGenTy ThenGenRCG(ThenGen); 5906 ThenGenRCG(CGF); 5907 } 5908 } 5909