1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/StmtOpenMP.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/Bitcode/ReaderWriter.h" 22 #include "llvm/IR/CallSite.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/GlobalValue.h" 25 #include "llvm/IR/Value.h" 26 #include "llvm/Support/Format.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 30 using namespace clang; 31 using namespace CodeGen; 32 33 namespace { 34 /// \brief Base class for handling code generation inside OpenMP regions. 35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 36 public: 37 /// \brief Kinds of OpenMP regions used in codegen. 38 enum CGOpenMPRegionKind { 39 /// \brief Region with outlined function for standalone 'parallel' 40 /// directive. 41 ParallelOutlinedRegion, 42 /// \brief Region with outlined function for standalone 'task' directive. 43 TaskOutlinedRegion, 44 /// \brief Region for constructs that do not require function outlining, 45 /// like 'for', 'sections', 'atomic' etc. directives. 46 InlinedRegion, 47 /// \brief Region with outlined function for standalone 'target' directive. 48 TargetRegion, 49 }; 50 51 CGOpenMPRegionInfo(const CapturedStmt &CS, 52 const CGOpenMPRegionKind RegionKind, 53 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 54 bool HasCancel) 55 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 56 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 57 58 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 59 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 60 bool HasCancel) 61 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 62 Kind(Kind), HasCancel(HasCancel) {} 63 64 /// \brief Get a variable or parameter for storing global thread id 65 /// inside OpenMP construct. 66 virtual const VarDecl *getThreadIDVariable() const = 0; 67 68 /// \brief Emit the captured statement body. 69 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 70 71 /// \brief Get an LValue for the current ThreadID variable. 72 /// \return LValue for thread id variable. This LValue always has type int32*. 73 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 74 75 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 76 77 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 78 79 bool hasCancel() const { return HasCancel; } 80 81 static bool classof(const CGCapturedStmtInfo *Info) { 82 return Info->getKind() == CR_OpenMP; 83 } 84 85 protected: 86 CGOpenMPRegionKind RegionKind; 87 RegionCodeGenTy CodeGen; 88 OpenMPDirectiveKind Kind; 89 bool HasCancel; 90 }; 91 92 /// \brief API for captured statement code generation in OpenMP constructs. 93 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 94 public: 95 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 96 const RegionCodeGenTy &CodeGen, 97 OpenMPDirectiveKind Kind, bool HasCancel) 98 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 99 HasCancel), 100 ThreadIDVar(ThreadIDVar) { 101 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 102 } 103 104 /// \brief Get a variable or parameter for storing global thread id 105 /// inside OpenMP construct. 106 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 107 108 /// \brief Get the name of the capture helper. 109 StringRef getHelperName() const override { return ".omp_outlined."; } 110 111 static bool classof(const CGCapturedStmtInfo *Info) { 112 return CGOpenMPRegionInfo::classof(Info) && 113 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 114 ParallelOutlinedRegion; 115 } 116 117 private: 118 /// \brief A variable or parameter storing global thread id for OpenMP 119 /// constructs. 120 const VarDecl *ThreadIDVar; 121 }; 122 123 /// \brief API for captured statement code generation in OpenMP constructs. 124 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 125 public: 126 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 127 const VarDecl *ThreadIDVar, 128 const RegionCodeGenTy &CodeGen, 129 OpenMPDirectiveKind Kind, bool HasCancel) 130 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 131 ThreadIDVar(ThreadIDVar) { 132 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 133 } 134 135 /// \brief Get a variable or parameter for storing global thread id 136 /// inside OpenMP construct. 137 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 138 139 /// \brief Get an LValue for the current ThreadID variable. 140 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 141 142 /// \brief Get the name of the capture helper. 143 StringRef getHelperName() const override { return ".omp_outlined."; } 144 145 static bool classof(const CGCapturedStmtInfo *Info) { 146 return CGOpenMPRegionInfo::classof(Info) && 147 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 148 TaskOutlinedRegion; 149 } 150 151 private: 152 /// \brief A variable or parameter storing global thread id for OpenMP 153 /// constructs. 154 const VarDecl *ThreadIDVar; 155 }; 156 157 /// \brief API for inlined captured statement code generation in OpenMP 158 /// constructs. 159 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 160 public: 161 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 162 const RegionCodeGenTy &CodeGen, 163 OpenMPDirectiveKind Kind, bool HasCancel) 164 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 165 OldCSI(OldCSI), 166 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 167 168 // \brief Retrieve the value of the context parameter. 169 llvm::Value *getContextValue() const override { 170 if (OuterRegionInfo) 171 return OuterRegionInfo->getContextValue(); 172 llvm_unreachable("No context value for inlined OpenMP region"); 173 } 174 175 void setContextValue(llvm::Value *V) override { 176 if (OuterRegionInfo) { 177 OuterRegionInfo->setContextValue(V); 178 return; 179 } 180 llvm_unreachable("No context value for inlined OpenMP region"); 181 } 182 183 /// \brief Lookup the captured field decl for a variable. 184 const FieldDecl *lookup(const VarDecl *VD) const override { 185 if (OuterRegionInfo) 186 return OuterRegionInfo->lookup(VD); 187 // If there is no outer outlined region,no need to lookup in a list of 188 // captured variables, we can use the original one. 189 return nullptr; 190 } 191 192 FieldDecl *getThisFieldDecl() const override { 193 if (OuterRegionInfo) 194 return OuterRegionInfo->getThisFieldDecl(); 195 return nullptr; 196 } 197 198 /// \brief Get a variable or parameter for storing global thread id 199 /// inside OpenMP construct. 200 const VarDecl *getThreadIDVariable() const override { 201 if (OuterRegionInfo) 202 return OuterRegionInfo->getThreadIDVariable(); 203 return nullptr; 204 } 205 206 /// \brief Get the name of the capture helper. 207 StringRef getHelperName() const override { 208 if (auto *OuterRegionInfo = getOldCSI()) 209 return OuterRegionInfo->getHelperName(); 210 llvm_unreachable("No helper name for inlined OpenMP construct"); 211 } 212 213 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 214 215 static bool classof(const CGCapturedStmtInfo *Info) { 216 return CGOpenMPRegionInfo::classof(Info) && 217 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 218 } 219 220 private: 221 /// \brief CodeGen info about outer OpenMP region. 222 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 223 CGOpenMPRegionInfo *OuterRegionInfo; 224 }; 225 226 /// \brief API for captured statement code generation in OpenMP target 227 /// constructs. For this captures, implicit parameters are used instead of the 228 /// captured fields. The name of the target region has to be unique in a given 229 /// application so it is provided by the client, because only the client has 230 /// the information to generate that. 231 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { 232 public: 233 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 234 const RegionCodeGenTy &CodeGen, StringRef HelperName) 235 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 236 /*HasCancel=*/false), 237 HelperName(HelperName) {} 238 239 /// \brief This is unused for target regions because each starts executing 240 /// with a single thread. 241 const VarDecl *getThreadIDVariable() const override { return nullptr; } 242 243 /// \brief Get the name of the capture helper. 244 StringRef getHelperName() const override { return HelperName; } 245 246 static bool classof(const CGCapturedStmtInfo *Info) { 247 return CGOpenMPRegionInfo::classof(Info) && 248 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 249 } 250 251 private: 252 StringRef HelperName; 253 }; 254 255 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 256 llvm_unreachable("No codegen for expressions"); 257 } 258 /// \brief API for generation of expressions captured in a innermost OpenMP 259 /// region. 260 class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo { 261 public: 262 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 263 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 264 OMPD_unknown, 265 /*HasCancel=*/false), 266 PrivScope(CGF) { 267 // Make sure the globals captured in the provided statement are local by 268 // using the privatization logic. We assume the same variable is not 269 // captured more than once. 270 for (auto &C : CS.captures()) { 271 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 272 continue; 273 274 const VarDecl *VD = C.getCapturedVar(); 275 if (VD->isLocalVarDeclOrParm()) 276 continue; 277 278 DeclRefExpr DRE(const_cast<VarDecl *>(VD), 279 /*RefersToEnclosingVariableOrCapture=*/false, 280 VD->getType().getNonReferenceType(), VK_LValue, 281 SourceLocation()); 282 PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { 283 return CGF.EmitLValue(&DRE).getAddress(); 284 }); 285 } 286 (void)PrivScope.Privatize(); 287 } 288 289 /// \brief Lookup the captured field decl for a variable. 290 const FieldDecl *lookup(const VarDecl *VD) const override { 291 if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 292 return FD; 293 return nullptr; 294 } 295 296 /// \brief Emit the captured statement body. 297 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 298 llvm_unreachable("No body for expressions"); 299 } 300 301 /// \brief Get a variable or parameter for storing global thread id 302 /// inside OpenMP construct. 303 const VarDecl *getThreadIDVariable() const override { 304 llvm_unreachable("No thread id for expressions"); 305 } 306 307 /// \brief Get the name of the capture helper. 308 StringRef getHelperName() const override { 309 llvm_unreachable("No helper name for expressions"); 310 } 311 312 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 313 314 private: 315 /// Private scope to capture global variables. 316 CodeGenFunction::OMPPrivateScope PrivScope; 317 }; 318 319 /// \brief RAII for emitting code of OpenMP constructs. 320 class InlinedOpenMPRegionRAII { 321 CodeGenFunction &CGF; 322 323 public: 324 /// \brief Constructs region for combined constructs. 325 /// \param CodeGen Code generation sequence for combined directives. Includes 326 /// a list of functions used for code generation of implicitly inlined 327 /// regions. 328 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 329 OpenMPDirectiveKind Kind, bool HasCancel) 330 : CGF(CGF) { 331 // Start emission for the construct. 332 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 333 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 334 } 335 336 ~InlinedOpenMPRegionRAII() { 337 // Restore original CapturedStmtInfo only if we're done with code emission. 338 auto *OldCSI = 339 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 340 delete CGF.CapturedStmtInfo; 341 CGF.CapturedStmtInfo = OldCSI; 342 } 343 }; 344 345 /// \brief Values for bit flags used in the ident_t to describe the fields. 346 /// All enumeric elements are named and described in accordance with the code 347 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 348 enum OpenMPLocationFlags { 349 /// \brief Use trampoline for internal microtask. 350 OMP_IDENT_IMD = 0x01, 351 /// \brief Use c-style ident structure. 352 OMP_IDENT_KMPC = 0x02, 353 /// \brief Atomic reduction option for kmpc_reduce. 354 OMP_ATOMIC_REDUCE = 0x10, 355 /// \brief Explicit 'barrier' directive. 356 OMP_IDENT_BARRIER_EXPL = 0x20, 357 /// \brief Implicit barrier in code. 358 OMP_IDENT_BARRIER_IMPL = 0x40, 359 /// \brief Implicit barrier in 'for' directive. 360 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 361 /// \brief Implicit barrier in 'sections' directive. 362 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 363 /// \brief Implicit barrier in 'single' directive. 364 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 365 }; 366 367 /// \brief Describes ident structure that describes a source location. 368 /// All descriptions are taken from 369 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h 370 /// Original structure: 371 /// typedef struct ident { 372 /// kmp_int32 reserved_1; /**< might be used in Fortran; 373 /// see above */ 374 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 375 /// KMP_IDENT_KMPC identifies this union 376 /// member */ 377 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 378 /// see above */ 379 ///#if USE_ITT_BUILD 380 /// /* but currently used for storing 381 /// region-specific ITT */ 382 /// /* contextual information. */ 383 ///#endif /* USE_ITT_BUILD */ 384 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 385 /// C++ */ 386 /// char const *psource; /**< String describing the source location. 387 /// The string is composed of semi-colon separated 388 // fields which describe the source file, 389 /// the function and a pair of line numbers that 390 /// delimit the construct. 391 /// */ 392 /// } ident_t; 393 enum IdentFieldIndex { 394 /// \brief might be used in Fortran 395 IdentField_Reserved_1, 396 /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 397 IdentField_Flags, 398 /// \brief Not really used in Fortran any more 399 IdentField_Reserved_2, 400 /// \brief Source[4] in Fortran, do not use for C++ 401 IdentField_Reserved_3, 402 /// \brief String describing the source location. The string is composed of 403 /// semi-colon separated fields which describe the source file, the function 404 /// and a pair of line numbers that delimit the construct. 405 IdentField_PSource 406 }; 407 408 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 409 /// the enum sched_type in kmp.h). 410 enum OpenMPSchedType { 411 /// \brief Lower bound for default (unordered) versions. 412 OMP_sch_lower = 32, 413 OMP_sch_static_chunked = 33, 414 OMP_sch_static = 34, 415 OMP_sch_dynamic_chunked = 35, 416 OMP_sch_guided_chunked = 36, 417 OMP_sch_runtime = 37, 418 OMP_sch_auto = 38, 419 /// \brief Lower bound for 'ordered' versions. 420 OMP_ord_lower = 64, 421 OMP_ord_static_chunked = 65, 422 OMP_ord_static = 66, 423 OMP_ord_dynamic_chunked = 67, 424 OMP_ord_guided_chunked = 68, 425 OMP_ord_runtime = 69, 426 OMP_ord_auto = 70, 427 OMP_sch_default = OMP_sch_static, 428 /// \brief dist_schedule types 429 OMP_dist_sch_static_chunked = 91, 430 OMP_dist_sch_static = 92, 431 }; 432 433 enum OpenMPRTLFunction { 434 /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 435 /// kmpc_micro microtask, ...); 436 OMPRTL__kmpc_fork_call, 437 /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, 438 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 439 OMPRTL__kmpc_threadprivate_cached, 440 /// \brief Call to void __kmpc_threadprivate_register( ident_t *, 441 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 442 OMPRTL__kmpc_threadprivate_register, 443 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 444 OMPRTL__kmpc_global_thread_num, 445 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 446 // kmp_critical_name *crit); 447 OMPRTL__kmpc_critical, 448 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 449 // global_tid, kmp_critical_name *crit, uintptr_t hint); 450 OMPRTL__kmpc_critical_with_hint, 451 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 452 // kmp_critical_name *crit); 453 OMPRTL__kmpc_end_critical, 454 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 455 // global_tid); 456 OMPRTL__kmpc_cancel_barrier, 457 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 458 OMPRTL__kmpc_barrier, 459 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 460 OMPRTL__kmpc_for_static_fini, 461 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 462 // global_tid); 463 OMPRTL__kmpc_serialized_parallel, 464 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 465 // global_tid); 466 OMPRTL__kmpc_end_serialized_parallel, 467 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 468 // kmp_int32 num_threads); 469 OMPRTL__kmpc_push_num_threads, 470 // Call to void __kmpc_flush(ident_t *loc); 471 OMPRTL__kmpc_flush, 472 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 473 OMPRTL__kmpc_master, 474 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 475 OMPRTL__kmpc_end_master, 476 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 477 // int end_part); 478 OMPRTL__kmpc_omp_taskyield, 479 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 480 OMPRTL__kmpc_single, 481 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 482 OMPRTL__kmpc_end_single, 483 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 484 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 485 // kmp_routine_entry_t *task_entry); 486 OMPRTL__kmpc_omp_task_alloc, 487 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 488 // new_task); 489 OMPRTL__kmpc_omp_task, 490 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 491 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 492 // kmp_int32 didit); 493 OMPRTL__kmpc_copyprivate, 494 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 495 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 496 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 497 OMPRTL__kmpc_reduce, 498 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 499 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 500 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 501 // *lck); 502 OMPRTL__kmpc_reduce_nowait, 503 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 504 // kmp_critical_name *lck); 505 OMPRTL__kmpc_end_reduce, 506 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 507 // kmp_critical_name *lck); 508 OMPRTL__kmpc_end_reduce_nowait, 509 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 510 // kmp_task_t * new_task); 511 OMPRTL__kmpc_omp_task_begin_if0, 512 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 513 // kmp_task_t * new_task); 514 OMPRTL__kmpc_omp_task_complete_if0, 515 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 516 OMPRTL__kmpc_ordered, 517 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 518 OMPRTL__kmpc_end_ordered, 519 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 520 // global_tid); 521 OMPRTL__kmpc_omp_taskwait, 522 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 523 OMPRTL__kmpc_taskgroup, 524 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 525 OMPRTL__kmpc_end_taskgroup, 526 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 527 // int proc_bind); 528 OMPRTL__kmpc_push_proc_bind, 529 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 530 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 531 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 532 OMPRTL__kmpc_omp_task_with_deps, 533 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 534 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 535 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 536 OMPRTL__kmpc_omp_wait_deps, 537 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 538 // global_tid, kmp_int32 cncl_kind); 539 OMPRTL__kmpc_cancellationpoint, 540 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 541 // kmp_int32 cncl_kind); 542 OMPRTL__kmpc_cancel, 543 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 544 // kmp_int32 num_teams, kmp_int32 thread_limit); 545 OMPRTL__kmpc_push_num_teams, 546 /// \brief Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, 547 /// kmpc_micro microtask, ...); 548 OMPRTL__kmpc_fork_teams, 549 550 // 551 // Offloading related calls 552 // 553 // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 554 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 555 // *arg_types); 556 OMPRTL__tgt_target, 557 // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 558 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 559 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 560 OMPRTL__tgt_target_teams, 561 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 562 OMPRTL__tgt_register_lib, 563 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 564 OMPRTL__tgt_unregister_lib, 565 }; 566 567 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 568 /// region. 569 class CleanupTy final : public EHScopeStack::Cleanup { 570 PrePostActionTy *Action; 571 572 public: 573 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 574 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 575 if (!CGF.HaveInsertPoint()) 576 return; 577 Action->Exit(CGF); 578 } 579 }; 580 581 } // anonymous namespace 582 583 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 584 CodeGenFunction::RunCleanupsScope Scope(CGF); 585 if (PrePostAction) { 586 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 587 Callback(CodeGen, CGF, *PrePostAction); 588 } else { 589 PrePostActionTy Action; 590 Callback(CodeGen, CGF, Action); 591 } 592 } 593 594 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 595 return CGF.EmitLoadOfPointerLValue( 596 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 597 getThreadIDVariable()->getType()->castAs<PointerType>()); 598 } 599 600 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 601 if (!CGF.HaveInsertPoint()) 602 return; 603 // 1.2.2 OpenMP Language Terminology 604 // Structured block - An executable statement with a single entry at the 605 // top and a single exit at the bottom. 606 // The point of exit cannot be a branch out of the structured block. 607 // longjmp() and throw() must not violate the entry/exit criteria. 608 CGF.EHStack.pushTerminate(); 609 CodeGen(CGF); 610 CGF.EHStack.popTerminate(); 611 } 612 613 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 614 CodeGenFunction &CGF) { 615 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 616 getThreadIDVariable()->getType(), 617 AlignmentSource::Decl); 618 } 619 620 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 621 : CGM(CGM), OffloadEntriesInfoManager(CGM) { 622 IdentTy = llvm::StructType::create( 623 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 624 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 625 CGM.Int8PtrTy /* psource */, nullptr); 626 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 627 628 loadOffloadInfoMetadata(); 629 } 630 631 void CGOpenMPRuntime::clear() { 632 InternalVars.clear(); 633 } 634 635 static llvm::Function * 636 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 637 const Expr *CombinerInitializer, const VarDecl *In, 638 const VarDecl *Out, bool IsCombiner) { 639 // void .omp_combiner.(Ty *in, Ty *out); 640 auto &C = CGM.getContext(); 641 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 642 FunctionArgList Args; 643 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 644 /*Id=*/nullptr, PtrTy); 645 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 646 /*Id=*/nullptr, PtrTy); 647 Args.push_back(&OmpOutParm); 648 Args.push_back(&OmpInParm); 649 auto &FnInfo = 650 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 651 auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 652 auto *Fn = llvm::Function::Create( 653 FnTy, llvm::GlobalValue::InternalLinkage, 654 IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); 655 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); 656 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 657 CodeGenFunction CGF(CGM); 658 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 659 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 660 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); 661 CodeGenFunction::OMPPrivateScope Scope(CGF); 662 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 663 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { 664 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 665 .getAddress(); 666 }); 667 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 668 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { 669 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 670 .getAddress(); 671 }); 672 (void)Scope.Privatize(); 673 CGF.EmitIgnoredExpr(CombinerInitializer); 674 Scope.ForceCleanup(); 675 CGF.FinishFunction(); 676 return Fn; 677 } 678 679 void CGOpenMPRuntime::emitUserDefinedReduction( 680 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 681 if (UDRMap.count(D) > 0) 682 return; 683 auto &C = CGM.getContext(); 684 if (!In || !Out) { 685 In = &C.Idents.get("omp_in"); 686 Out = &C.Idents.get("omp_out"); 687 } 688 llvm::Function *Combiner = emitCombinerOrInitializer( 689 CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), 690 cast<VarDecl>(D->lookup(Out).front()), 691 /*IsCombiner=*/true); 692 llvm::Function *Initializer = nullptr; 693 if (auto *Init = D->getInitializer()) { 694 if (!Priv || !Orig) { 695 Priv = &C.Idents.get("omp_priv"); 696 Orig = &C.Idents.get("omp_orig"); 697 } 698 Initializer = emitCombinerOrInitializer( 699 CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), 700 cast<VarDecl>(D->lookup(Priv).front()), 701 /*IsCombiner=*/false); 702 } 703 UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); 704 if (CGF) { 705 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 706 Decls.second.push_back(D); 707 } 708 } 709 710 std::pair<llvm::Function *, llvm::Function *> 711 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 712 auto I = UDRMap.find(D); 713 if (I != UDRMap.end()) 714 return I->second; 715 emitUserDefinedReduction(/*CGF=*/nullptr, D); 716 return UDRMap.lookup(D); 717 } 718 719 // Layout information for ident_t. 720 static CharUnits getIdentAlign(CodeGenModule &CGM) { 721 return CGM.getPointerAlign(); 722 } 723 static CharUnits getIdentSize(CodeGenModule &CGM) { 724 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 725 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 726 } 727 static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { 728 // All the fields except the last are i32, so this works beautifully. 729 return unsigned(Field) * CharUnits::fromQuantity(4); 730 } 731 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 732 IdentFieldIndex Field, 733 const llvm::Twine &Name = "") { 734 auto Offset = getOffsetOfIdentField(Field); 735 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 736 } 737 738 llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( 739 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 740 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 741 assert(ThreadIDVar->getType()->isPointerType() && 742 "thread id variable must be of type kmp_int32 *"); 743 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 744 CodeGenFunction CGF(CGM, true); 745 bool HasCancel = false; 746 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 747 HasCancel = OPD->hasCancel(); 748 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 749 HasCancel = OPSD->hasCancel(); 750 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 751 HasCancel = OPFD->hasCancel(); 752 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 753 HasCancel); 754 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 755 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 756 } 757 758 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 759 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 760 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 761 assert(!ThreadIDVar->getType()->isPointerType() && 762 "thread id variable must be of type kmp_int32 for tasks"); 763 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 764 CodeGenFunction CGF(CGM, true); 765 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 766 InnermostKind, 767 cast<OMPTaskDirective>(D).hasCancel()); 768 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 769 return CGF.GenerateCapturedStmtFunction(*CS); 770 } 771 772 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 773 CharUnits Align = getIdentAlign(CGM); 774 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 775 if (!Entry) { 776 if (!DefaultOpenMPPSource) { 777 // Initialize default location for psource field of ident_t structure of 778 // all ident_t objects. Format is ";file;function;line;column;;". 779 // Taken from 780 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 781 DefaultOpenMPPSource = 782 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 783 DefaultOpenMPPSource = 784 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 785 } 786 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 787 CGM.getModule(), IdentTy, /*isConstant*/ true, 788 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 789 DefaultOpenMPLocation->setUnnamedAddr(true); 790 DefaultOpenMPLocation->setAlignment(Align.getQuantity()); 791 792 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 793 llvm::Constant *Values[] = {Zero, 794 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 795 Zero, Zero, DefaultOpenMPPSource}; 796 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 797 DefaultOpenMPLocation->setInitializer(Init); 798 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 799 } 800 return Address(Entry, Align); 801 } 802 803 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 804 SourceLocation Loc, 805 unsigned Flags) { 806 Flags |= OMP_IDENT_KMPC; 807 // If no debug info is generated - return global default location. 808 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 809 Loc.isInvalid()) 810 return getOrCreateDefaultLocation(Flags).getPointer(); 811 812 assert(CGF.CurFn && "No function in current CodeGenFunction."); 813 814 Address LocValue = Address::invalid(); 815 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 816 if (I != OpenMPLocThreadIDMap.end()) 817 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 818 819 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 820 // GetOpenMPThreadID was called before this routine. 821 if (!LocValue.isValid()) { 822 // Generate "ident_t .kmpc_loc.addr;" 823 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 824 ".kmpc_loc.addr"); 825 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 826 Elem.second.DebugLoc = AI.getPointer(); 827 LocValue = AI; 828 829 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 830 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 831 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 832 CGM.getSize(getIdentSize(CGF.CGM))); 833 } 834 835 // char **psource = &.kmpc_loc_<flags>.addr.psource; 836 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 837 838 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 839 if (OMPDebugLoc == nullptr) { 840 SmallString<128> Buffer2; 841 llvm::raw_svector_ostream OS2(Buffer2); 842 // Build debug location 843 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 844 OS2 << ";" << PLoc.getFilename() << ";"; 845 if (const FunctionDecl *FD = 846 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 847 OS2 << FD->getQualifiedNameAsString(); 848 } 849 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 850 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 851 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 852 } 853 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 854 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 855 856 // Our callers always pass this to a runtime function, so for 857 // convenience, go ahead and return a naked pointer. 858 return LocValue.getPointer(); 859 } 860 861 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 862 SourceLocation Loc) { 863 assert(CGF.CurFn && "No function in current CodeGenFunction."); 864 865 llvm::Value *ThreadID = nullptr; 866 // Check whether we've already cached a load of the thread id in this 867 // function. 868 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 869 if (I != OpenMPLocThreadIDMap.end()) { 870 ThreadID = I->second.ThreadID; 871 if (ThreadID != nullptr) 872 return ThreadID; 873 } 874 if (auto *OMPRegionInfo = 875 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 876 if (OMPRegionInfo->getThreadIDVariable()) { 877 // Check if this an outlined function with thread id passed as argument. 878 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 879 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 880 // If value loaded in entry block, cache it and use it everywhere in 881 // function. 882 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 883 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 884 Elem.second.ThreadID = ThreadID; 885 } 886 return ThreadID; 887 } 888 } 889 890 // This is not an outlined function region - need to call __kmpc_int32 891 // kmpc_global_thread_num(ident_t *loc). 892 // Generate thread id value and cache this value for use across the 893 // function. 894 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 895 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 896 ThreadID = 897 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 898 emitUpdateLocation(CGF, Loc)); 899 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 900 Elem.second.ThreadID = ThreadID; 901 return ThreadID; 902 } 903 904 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 905 assert(CGF.CurFn && "No function in current CodeGenFunction."); 906 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 907 OpenMPLocThreadIDMap.erase(CGF.CurFn); 908 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 909 for(auto *D : FunctionUDRMap[CGF.CurFn]) { 910 UDRMap.erase(D); 911 } 912 FunctionUDRMap.erase(CGF.CurFn); 913 } 914 } 915 916 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 917 if (!IdentTy) { 918 } 919 return llvm::PointerType::getUnqual(IdentTy); 920 } 921 922 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 923 if (!Kmpc_MicroTy) { 924 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 925 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 926 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 927 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 928 } 929 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 930 } 931 932 llvm::Constant * 933 CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 934 llvm::Constant *RTLFn = nullptr; 935 switch (static_cast<OpenMPRTLFunction>(Function)) { 936 case OMPRTL__kmpc_fork_call: { 937 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 938 // microtask, ...); 939 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 940 getKmpc_MicroPointerTy()}; 941 llvm::FunctionType *FnTy = 942 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 943 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 944 break; 945 } 946 case OMPRTL__kmpc_global_thread_num: { 947 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 948 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 949 llvm::FunctionType *FnTy = 950 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 951 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 952 break; 953 } 954 case OMPRTL__kmpc_threadprivate_cached: { 955 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 956 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 957 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 958 CGM.VoidPtrTy, CGM.SizeTy, 959 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 960 llvm::FunctionType *FnTy = 961 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 962 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 963 break; 964 } 965 case OMPRTL__kmpc_critical: { 966 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 967 // kmp_critical_name *crit); 968 llvm::Type *TypeParams[] = { 969 getIdentTyPointerTy(), CGM.Int32Ty, 970 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 971 llvm::FunctionType *FnTy = 972 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 973 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 974 break; 975 } 976 case OMPRTL__kmpc_critical_with_hint: { 977 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 978 // kmp_critical_name *crit, uintptr_t hint); 979 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 980 llvm::PointerType::getUnqual(KmpCriticalNameTy), 981 CGM.IntPtrTy}; 982 llvm::FunctionType *FnTy = 983 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 984 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 985 break; 986 } 987 case OMPRTL__kmpc_threadprivate_register: { 988 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 989 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 990 // typedef void *(*kmpc_ctor)(void *); 991 auto KmpcCtorTy = 992 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 993 /*isVarArg*/ false)->getPointerTo(); 994 // typedef void *(*kmpc_cctor)(void *, void *); 995 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 996 auto KmpcCopyCtorTy = 997 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 998 /*isVarArg*/ false)->getPointerTo(); 999 // typedef void (*kmpc_dtor)(void *); 1000 auto KmpcDtorTy = 1001 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1002 ->getPointerTo(); 1003 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1004 KmpcCopyCtorTy, KmpcDtorTy}; 1005 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1006 /*isVarArg*/ false); 1007 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1008 break; 1009 } 1010 case OMPRTL__kmpc_end_critical: { 1011 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1012 // kmp_critical_name *crit); 1013 llvm::Type *TypeParams[] = { 1014 getIdentTyPointerTy(), CGM.Int32Ty, 1015 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1016 llvm::FunctionType *FnTy = 1017 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1018 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1019 break; 1020 } 1021 case OMPRTL__kmpc_cancel_barrier: { 1022 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1023 // global_tid); 1024 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1025 llvm::FunctionType *FnTy = 1026 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1027 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1028 break; 1029 } 1030 case OMPRTL__kmpc_barrier: { 1031 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1032 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1033 llvm::FunctionType *FnTy = 1034 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1035 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1036 break; 1037 } 1038 case OMPRTL__kmpc_for_static_fini: { 1039 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1040 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1041 llvm::FunctionType *FnTy = 1042 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1043 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1044 break; 1045 } 1046 case OMPRTL__kmpc_push_num_threads: { 1047 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1048 // kmp_int32 num_threads) 1049 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1050 CGM.Int32Ty}; 1051 llvm::FunctionType *FnTy = 1052 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1053 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1054 break; 1055 } 1056 case OMPRTL__kmpc_serialized_parallel: { 1057 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1058 // global_tid); 1059 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1060 llvm::FunctionType *FnTy = 1061 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1062 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1063 break; 1064 } 1065 case OMPRTL__kmpc_end_serialized_parallel: { 1066 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1067 // global_tid); 1068 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1069 llvm::FunctionType *FnTy = 1070 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1071 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1072 break; 1073 } 1074 case OMPRTL__kmpc_flush: { 1075 // Build void __kmpc_flush(ident_t *loc); 1076 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1077 llvm::FunctionType *FnTy = 1078 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1079 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1080 break; 1081 } 1082 case OMPRTL__kmpc_master: { 1083 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1084 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1085 llvm::FunctionType *FnTy = 1086 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1087 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1088 break; 1089 } 1090 case OMPRTL__kmpc_end_master: { 1091 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1092 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1093 llvm::FunctionType *FnTy = 1094 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1095 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1096 break; 1097 } 1098 case OMPRTL__kmpc_omp_taskyield: { 1099 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1100 // int end_part); 1101 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1102 llvm::FunctionType *FnTy = 1103 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1104 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1105 break; 1106 } 1107 case OMPRTL__kmpc_single: { 1108 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1109 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1110 llvm::FunctionType *FnTy = 1111 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1112 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1113 break; 1114 } 1115 case OMPRTL__kmpc_end_single: { 1116 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1117 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1118 llvm::FunctionType *FnTy = 1119 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1120 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1121 break; 1122 } 1123 case OMPRTL__kmpc_omp_task_alloc: { 1124 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1125 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1126 // kmp_routine_entry_t *task_entry); 1127 assert(KmpRoutineEntryPtrTy != nullptr && 1128 "Type kmp_routine_entry_t must be created."); 1129 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1130 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1131 // Return void * and then cast to particular kmp_task_t type. 1132 llvm::FunctionType *FnTy = 1133 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1134 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1135 break; 1136 } 1137 case OMPRTL__kmpc_omp_task: { 1138 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1139 // *new_task); 1140 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1141 CGM.VoidPtrTy}; 1142 llvm::FunctionType *FnTy = 1143 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1144 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1145 break; 1146 } 1147 case OMPRTL__kmpc_copyprivate: { 1148 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1149 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1150 // kmp_int32 didit); 1151 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1152 auto *CpyFnTy = 1153 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1154 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1155 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1156 CGM.Int32Ty}; 1157 llvm::FunctionType *FnTy = 1158 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1159 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1160 break; 1161 } 1162 case OMPRTL__kmpc_reduce: { 1163 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1164 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1165 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1166 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1167 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1168 /*isVarArg=*/false); 1169 llvm::Type *TypeParams[] = { 1170 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1171 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1172 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1173 llvm::FunctionType *FnTy = 1174 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1175 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1176 break; 1177 } 1178 case OMPRTL__kmpc_reduce_nowait: { 1179 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1180 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1181 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1182 // *lck); 1183 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1184 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1185 /*isVarArg=*/false); 1186 llvm::Type *TypeParams[] = { 1187 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1188 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1189 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1190 llvm::FunctionType *FnTy = 1191 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1192 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1193 break; 1194 } 1195 case OMPRTL__kmpc_end_reduce: { 1196 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 1197 // kmp_critical_name *lck); 1198 llvm::Type *TypeParams[] = { 1199 getIdentTyPointerTy(), CGM.Int32Ty, 1200 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1201 llvm::FunctionType *FnTy = 1202 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1203 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 1204 break; 1205 } 1206 case OMPRTL__kmpc_end_reduce_nowait: { 1207 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 1208 // kmp_critical_name *lck); 1209 llvm::Type *TypeParams[] = { 1210 getIdentTyPointerTy(), CGM.Int32Ty, 1211 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1212 llvm::FunctionType *FnTy = 1213 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1214 RTLFn = 1215 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 1216 break; 1217 } 1218 case OMPRTL__kmpc_omp_task_begin_if0: { 1219 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1220 // *new_task); 1221 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1222 CGM.VoidPtrTy}; 1223 llvm::FunctionType *FnTy = 1224 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1225 RTLFn = 1226 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 1227 break; 1228 } 1229 case OMPRTL__kmpc_omp_task_complete_if0: { 1230 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1231 // *new_task); 1232 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1233 CGM.VoidPtrTy}; 1234 llvm::FunctionType *FnTy = 1235 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1236 RTLFn = CGM.CreateRuntimeFunction(FnTy, 1237 /*Name=*/"__kmpc_omp_task_complete_if0"); 1238 break; 1239 } 1240 case OMPRTL__kmpc_ordered: { 1241 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 1242 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1243 llvm::FunctionType *FnTy = 1244 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1245 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 1246 break; 1247 } 1248 case OMPRTL__kmpc_end_ordered: { 1249 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 1250 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1251 llvm::FunctionType *FnTy = 1252 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1253 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 1254 break; 1255 } 1256 case OMPRTL__kmpc_omp_taskwait: { 1257 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 1258 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1259 llvm::FunctionType *FnTy = 1260 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1261 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 1262 break; 1263 } 1264 case OMPRTL__kmpc_taskgroup: { 1265 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 1266 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1267 llvm::FunctionType *FnTy = 1268 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1269 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 1270 break; 1271 } 1272 case OMPRTL__kmpc_end_taskgroup: { 1273 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 1274 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1275 llvm::FunctionType *FnTy = 1276 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1277 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 1278 break; 1279 } 1280 case OMPRTL__kmpc_push_proc_bind: { 1281 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 1282 // int proc_bind) 1283 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1284 llvm::FunctionType *FnTy = 1285 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1286 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 1287 break; 1288 } 1289 case OMPRTL__kmpc_omp_task_with_deps: { 1290 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 1291 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 1292 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 1293 llvm::Type *TypeParams[] = { 1294 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 1295 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 1296 llvm::FunctionType *FnTy = 1297 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1298 RTLFn = 1299 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 1300 break; 1301 } 1302 case OMPRTL__kmpc_omp_wait_deps: { 1303 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 1304 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 1305 // kmp_depend_info_t *noalias_dep_list); 1306 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1307 CGM.Int32Ty, CGM.VoidPtrTy, 1308 CGM.Int32Ty, CGM.VoidPtrTy}; 1309 llvm::FunctionType *FnTy = 1310 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1311 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 1312 break; 1313 } 1314 case OMPRTL__kmpc_cancellationpoint: { 1315 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 1316 // global_tid, kmp_int32 cncl_kind) 1317 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1318 llvm::FunctionType *FnTy = 1319 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1320 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 1321 break; 1322 } 1323 case OMPRTL__kmpc_cancel: { 1324 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 1325 // kmp_int32 cncl_kind) 1326 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1327 llvm::FunctionType *FnTy = 1328 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1329 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 1330 break; 1331 } 1332 case OMPRTL__kmpc_push_num_teams: { 1333 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 1334 // kmp_int32 num_teams, kmp_int32 num_threads) 1335 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1336 CGM.Int32Ty}; 1337 llvm::FunctionType *FnTy = 1338 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1339 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 1340 break; 1341 } 1342 case OMPRTL__kmpc_fork_teams: { 1343 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 1344 // microtask, ...); 1345 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1346 getKmpc_MicroPointerTy()}; 1347 llvm::FunctionType *FnTy = 1348 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1349 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 1350 break; 1351 } 1352 case OMPRTL__tgt_target: { 1353 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 1354 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 1355 // *arg_types); 1356 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1357 CGM.VoidPtrTy, 1358 CGM.Int32Ty, 1359 CGM.VoidPtrPtrTy, 1360 CGM.VoidPtrPtrTy, 1361 CGM.SizeTy->getPointerTo(), 1362 CGM.Int32Ty->getPointerTo()}; 1363 llvm::FunctionType *FnTy = 1364 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1365 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 1366 break; 1367 } 1368 case OMPRTL__tgt_target_teams: { 1369 // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, 1370 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, 1371 // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); 1372 llvm::Type *TypeParams[] = {CGM.Int32Ty, 1373 CGM.VoidPtrTy, 1374 CGM.Int32Ty, 1375 CGM.VoidPtrPtrTy, 1376 CGM.VoidPtrPtrTy, 1377 CGM.SizeTy->getPointerTo(), 1378 CGM.Int32Ty->getPointerTo(), 1379 CGM.Int32Ty, 1380 CGM.Int32Ty}; 1381 llvm::FunctionType *FnTy = 1382 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1383 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 1384 break; 1385 } 1386 case OMPRTL__tgt_register_lib: { 1387 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 1388 QualType ParamTy = 1389 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1390 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1391 llvm::FunctionType *FnTy = 1392 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1393 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 1394 break; 1395 } 1396 case OMPRTL__tgt_unregister_lib: { 1397 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 1398 QualType ParamTy = 1399 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 1400 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 1401 llvm::FunctionType *FnTy = 1402 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1403 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 1404 break; 1405 } 1406 } 1407 assert(RTLFn && "Unable to find OpenMP runtime function"); 1408 return RTLFn; 1409 } 1410 1411 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 1412 bool IVSigned) { 1413 assert((IVSize == 32 || IVSize == 64) && 1414 "IV size is not compatible with the omp runtime"); 1415 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1416 : "__kmpc_for_static_init_4u") 1417 : (IVSigned ? "__kmpc_for_static_init_8" 1418 : "__kmpc_for_static_init_8u"); 1419 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1420 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1421 llvm::Type *TypeParams[] = { 1422 getIdentTyPointerTy(), // loc 1423 CGM.Int32Ty, // tid 1424 CGM.Int32Ty, // schedtype 1425 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1426 PtrTy, // p_lower 1427 PtrTy, // p_upper 1428 PtrTy, // p_stride 1429 ITy, // incr 1430 ITy // chunk 1431 }; 1432 llvm::FunctionType *FnTy = 1433 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1434 return CGM.CreateRuntimeFunction(FnTy, Name); 1435 } 1436 1437 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 1438 bool IVSigned) { 1439 assert((IVSize == 32 || IVSize == 64) && 1440 "IV size is not compatible with the omp runtime"); 1441 auto Name = 1442 IVSize == 32 1443 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1444 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1445 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1446 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1447 CGM.Int32Ty, // tid 1448 CGM.Int32Ty, // schedtype 1449 ITy, // lower 1450 ITy, // upper 1451 ITy, // stride 1452 ITy // chunk 1453 }; 1454 llvm::FunctionType *FnTy = 1455 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1456 return CGM.CreateRuntimeFunction(FnTy, Name); 1457 } 1458 1459 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1460 bool IVSigned) { 1461 assert((IVSize == 32 || IVSize == 64) && 1462 "IV size is not compatible with the omp runtime"); 1463 auto Name = 1464 IVSize == 32 1465 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1466 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1467 llvm::Type *TypeParams[] = { 1468 getIdentTyPointerTy(), // loc 1469 CGM.Int32Ty, // tid 1470 }; 1471 llvm::FunctionType *FnTy = 1472 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1473 return CGM.CreateRuntimeFunction(FnTy, Name); 1474 } 1475 1476 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1477 bool IVSigned) { 1478 assert((IVSize == 32 || IVSize == 64) && 1479 "IV size is not compatible with the omp runtime"); 1480 auto Name = 1481 IVSize == 32 1482 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1483 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1484 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1485 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1486 llvm::Type *TypeParams[] = { 1487 getIdentTyPointerTy(), // loc 1488 CGM.Int32Ty, // tid 1489 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1490 PtrTy, // p_lower 1491 PtrTy, // p_upper 1492 PtrTy // p_stride 1493 }; 1494 llvm::FunctionType *FnTy = 1495 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1496 return CGM.CreateRuntimeFunction(FnTy, Name); 1497 } 1498 1499 llvm::Constant * 1500 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1501 assert(!CGM.getLangOpts().OpenMPUseTLS || 1502 !CGM.getContext().getTargetInfo().isTLSSupported()); 1503 // Lookup the entry, lazily creating it if necessary. 1504 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1505 Twine(CGM.getMangledName(VD)) + ".cache."); 1506 } 1507 1508 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1509 const VarDecl *VD, 1510 Address VDAddr, 1511 SourceLocation Loc) { 1512 if (CGM.getLangOpts().OpenMPUseTLS && 1513 CGM.getContext().getTargetInfo().isTLSSupported()) 1514 return VDAddr; 1515 1516 auto VarTy = VDAddr.getElementType(); 1517 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1518 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1519 CGM.Int8PtrTy), 1520 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1521 getOrCreateThreadPrivateCache(VD)}; 1522 return Address(CGF.EmitRuntimeCall( 1523 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1524 VDAddr.getAlignment()); 1525 } 1526 1527 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1528 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1529 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1530 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1531 // library. 1532 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1533 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1534 OMPLoc); 1535 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1536 // to register constructor/destructor for variable. 1537 llvm::Value *Args[] = {OMPLoc, 1538 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1539 CGM.VoidPtrTy), 1540 Ctor, CopyCtor, Dtor}; 1541 CGF.EmitRuntimeCall( 1542 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1543 } 1544 1545 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1546 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1547 bool PerformInit, CodeGenFunction *CGF) { 1548 if (CGM.getLangOpts().OpenMPUseTLS && 1549 CGM.getContext().getTargetInfo().isTLSSupported()) 1550 return nullptr; 1551 1552 VD = VD->getDefinition(CGM.getContext()); 1553 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1554 ThreadPrivateWithDefinition.insert(VD); 1555 QualType ASTTy = VD->getType(); 1556 1557 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1558 auto Init = VD->getAnyInitializer(); 1559 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1560 // Generate function that re-emits the declaration's initializer into the 1561 // threadprivate copy of the variable VD 1562 CodeGenFunction CtorCGF(CGM); 1563 FunctionArgList Args; 1564 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1565 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1566 Args.push_back(&Dst); 1567 1568 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1569 CGM.getContext().VoidPtrTy, Args); 1570 auto FTy = CGM.getTypes().GetFunctionType(FI); 1571 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1572 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1573 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1574 Args, SourceLocation()); 1575 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1576 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1577 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1578 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1579 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1580 CtorCGF.ConvertTypeForMem(ASTTy)); 1581 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1582 /*IsInitializer=*/true); 1583 ArgVal = CtorCGF.EmitLoadOfScalar( 1584 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1585 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1586 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1587 CtorCGF.FinishFunction(); 1588 Ctor = Fn; 1589 } 1590 if (VD->getType().isDestructedType() != QualType::DK_none) { 1591 // Generate function that emits destructor call for the threadprivate copy 1592 // of the variable VD 1593 CodeGenFunction DtorCGF(CGM); 1594 FunctionArgList Args; 1595 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1596 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1597 Args.push_back(&Dst); 1598 1599 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1600 CGM.getContext().VoidTy, Args); 1601 auto FTy = CGM.getTypes().GetFunctionType(FI); 1602 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1603 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1604 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1605 SourceLocation()); 1606 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1607 DtorCGF.GetAddrOfLocalVar(&Dst), 1608 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1609 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1610 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1611 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1612 DtorCGF.FinishFunction(); 1613 Dtor = Fn; 1614 } 1615 // Do not emit init function if it is not required. 1616 if (!Ctor && !Dtor) 1617 return nullptr; 1618 1619 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1620 auto CopyCtorTy = 1621 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1622 /*isVarArg=*/false)->getPointerTo(); 1623 // Copying constructor for the threadprivate variable. 1624 // Must be NULL - reserved by runtime, but currently it requires that this 1625 // parameter is always NULL. Otherwise it fires assertion. 1626 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1627 if (Ctor == nullptr) { 1628 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1629 /*isVarArg=*/false)->getPointerTo(); 1630 Ctor = llvm::Constant::getNullValue(CtorTy); 1631 } 1632 if (Dtor == nullptr) { 1633 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1634 /*isVarArg=*/false)->getPointerTo(); 1635 Dtor = llvm::Constant::getNullValue(DtorTy); 1636 } 1637 if (!CGF) { 1638 auto InitFunctionTy = 1639 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1640 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1641 InitFunctionTy, ".__omp_threadprivate_init_.", 1642 CGM.getTypes().arrangeNullaryFunction()); 1643 CodeGenFunction InitCGF(CGM); 1644 FunctionArgList ArgList; 1645 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1646 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1647 Loc); 1648 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1649 InitCGF.FinishFunction(); 1650 return InitFunction; 1651 } 1652 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1653 } 1654 return nullptr; 1655 } 1656 1657 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1658 /// function. Here is the logic: 1659 /// if (Cond) { 1660 /// ThenGen(); 1661 /// } else { 1662 /// ElseGen(); 1663 /// } 1664 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1665 const RegionCodeGenTy &ThenGen, 1666 const RegionCodeGenTy &ElseGen) { 1667 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1668 1669 // If the condition constant folds and can be elided, try to avoid emitting 1670 // the condition and the dead arm of the if/else. 1671 bool CondConstant; 1672 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1673 if (CondConstant) 1674 ThenGen(CGF); 1675 else 1676 ElseGen(CGF); 1677 return; 1678 } 1679 1680 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1681 // emit the conditional branch. 1682 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1683 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1684 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1685 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1686 1687 // Emit the 'then' code. 1688 CGF.EmitBlock(ThenBlock); 1689 ThenGen(CGF); 1690 CGF.EmitBranch(ContBlock); 1691 // Emit the 'else' code if present. 1692 // There is no need to emit line number for unconditional branch. 1693 (void)ApplyDebugLocation::CreateEmpty(CGF); 1694 CGF.EmitBlock(ElseBlock); 1695 ElseGen(CGF); 1696 // There is no need to emit line number for unconditional branch. 1697 (void)ApplyDebugLocation::CreateEmpty(CGF); 1698 CGF.EmitBranch(ContBlock); 1699 // Emit the continuation block for code after the if. 1700 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1701 } 1702 1703 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1704 llvm::Value *OutlinedFn, 1705 ArrayRef<llvm::Value *> CapturedVars, 1706 const Expr *IfCond) { 1707 if (!CGF.HaveInsertPoint()) 1708 return; 1709 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1710 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 1711 PrePostActionTy &) { 1712 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1713 auto &RT = CGF.CGM.getOpenMPRuntime(); 1714 llvm::Value *Args[] = { 1715 RTLoc, 1716 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1717 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1718 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1719 RealArgs.append(std::begin(Args), std::end(Args)); 1720 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1721 1722 auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 1723 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1724 }; 1725 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 1726 PrePostActionTy &) { 1727 auto &RT = CGF.CGM.getOpenMPRuntime(); 1728 auto ThreadID = RT.getThreadID(CGF, Loc); 1729 // Build calls: 1730 // __kmpc_serialized_parallel(&Loc, GTid); 1731 llvm::Value *Args[] = {RTLoc, ThreadID}; 1732 CGF.EmitRuntimeCall( 1733 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 1734 1735 // OutlinedFn(>id, &zero, CapturedStruct); 1736 auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1737 Address ZeroAddr = 1738 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1739 /*Name*/ ".zero.addr"); 1740 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1741 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1742 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1743 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1744 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1745 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1746 1747 // __kmpc_end_serialized_parallel(&Loc, GTid); 1748 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1749 CGF.EmitRuntimeCall( 1750 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 1751 EndArgs); 1752 }; 1753 if (IfCond) 1754 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1755 else { 1756 RegionCodeGenTy ThenRCG(ThenGen); 1757 ThenRCG(CGF); 1758 } 1759 } 1760 1761 // If we're inside an (outlined) parallel region, use the region info's 1762 // thread-ID variable (it is passed in a first argument of the outlined function 1763 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1764 // regular serial code region, get thread ID by calling kmp_int32 1765 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1766 // return the address of that temp. 1767 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1768 SourceLocation Loc) { 1769 if (auto *OMPRegionInfo = 1770 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1771 if (OMPRegionInfo->getThreadIDVariable()) 1772 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1773 1774 auto ThreadID = getThreadID(CGF, Loc); 1775 auto Int32Ty = 1776 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1777 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1778 CGF.EmitStoreOfScalar(ThreadID, 1779 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1780 1781 return ThreadIDTemp; 1782 } 1783 1784 llvm::Constant * 1785 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1786 const llvm::Twine &Name) { 1787 SmallString<256> Buffer; 1788 llvm::raw_svector_ostream Out(Buffer); 1789 Out << Name; 1790 auto RuntimeName = Out.str(); 1791 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1792 if (Elem.second) { 1793 assert(Elem.second->getType()->getPointerElementType() == Ty && 1794 "OMP internal variable has different type than requested"); 1795 return &*Elem.second; 1796 } 1797 1798 return Elem.second = new llvm::GlobalVariable( 1799 CGM.getModule(), Ty, /*IsConstant*/ false, 1800 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1801 Elem.first()); 1802 } 1803 1804 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1805 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1806 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1807 } 1808 1809 namespace { 1810 /// Common pre(post)-action for different OpenMP constructs. 1811 class CommonActionTy final : public PrePostActionTy { 1812 llvm::Value *EnterCallee; 1813 ArrayRef<llvm::Value *> EnterArgs; 1814 llvm::Value *ExitCallee; 1815 ArrayRef<llvm::Value *> ExitArgs; 1816 bool Conditional; 1817 llvm::BasicBlock *ContBlock = nullptr; 1818 1819 public: 1820 CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, 1821 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, 1822 bool Conditional = false) 1823 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 1824 ExitArgs(ExitArgs), Conditional(Conditional) {} 1825 void Enter(CodeGenFunction &CGF) override { 1826 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 1827 if (Conditional) { 1828 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 1829 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1830 ContBlock = CGF.createBasicBlock("omp_if.end"); 1831 // Generate the branch (If-stmt) 1832 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1833 CGF.EmitBlock(ThenBlock); 1834 } 1835 } 1836 void Done(CodeGenFunction &CGF) { 1837 // Emit the rest of blocks/branches 1838 CGF.EmitBranch(ContBlock); 1839 CGF.EmitBlock(ContBlock, true); 1840 } 1841 void Exit(CodeGenFunction &CGF) override { 1842 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 1843 } 1844 }; 1845 } // anonymous namespace 1846 1847 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1848 StringRef CriticalName, 1849 const RegionCodeGenTy &CriticalOpGen, 1850 SourceLocation Loc, const Expr *Hint) { 1851 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 1852 // CriticalOpGen(); 1853 // __kmpc_end_critical(ident_t *, gtid, Lock); 1854 // Prepare arguments and build a call to __kmpc_critical 1855 if (!CGF.HaveInsertPoint()) 1856 return; 1857 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1858 getCriticalRegionLock(CriticalName)}; 1859 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 1860 std::end(Args)); 1861 if (Hint) { 1862 EnterArgs.push_back(CGF.Builder.CreateIntCast( 1863 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 1864 } 1865 CommonActionTy Action( 1866 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 1867 : OMPRTL__kmpc_critical), 1868 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 1869 CriticalOpGen.setAction(Action); 1870 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 1871 } 1872 1873 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 1874 const RegionCodeGenTy &MasterOpGen, 1875 SourceLocation Loc) { 1876 if (!CGF.HaveInsertPoint()) 1877 return; 1878 // if(__kmpc_master(ident_t *, gtid)) { 1879 // MasterOpGen(); 1880 // __kmpc_end_master(ident_t *, gtid); 1881 // } 1882 // Prepare arguments and build a call to __kmpc_master 1883 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1884 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 1885 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 1886 /*Conditional=*/true); 1887 MasterOpGen.setAction(Action); 1888 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 1889 Action.Done(CGF); 1890 } 1891 1892 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 1893 SourceLocation Loc) { 1894 if (!CGF.HaveInsertPoint()) 1895 return; 1896 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 1897 llvm::Value *Args[] = { 1898 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1899 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 1900 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 1901 } 1902 1903 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 1904 const RegionCodeGenTy &TaskgroupOpGen, 1905 SourceLocation Loc) { 1906 if (!CGF.HaveInsertPoint()) 1907 return; 1908 // __kmpc_taskgroup(ident_t *, gtid); 1909 // TaskgroupOpGen(); 1910 // __kmpc_end_taskgroup(ident_t *, gtid); 1911 // Prepare arguments and build a call to __kmpc_taskgroup 1912 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1913 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 1914 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 1915 Args); 1916 TaskgroupOpGen.setAction(Action); 1917 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 1918 } 1919 1920 /// Given an array of pointers to variables, project the address of a 1921 /// given variable. 1922 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 1923 unsigned Index, const VarDecl *Var) { 1924 // Pull out the pointer to the variable. 1925 Address PtrAddr = 1926 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 1927 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 1928 1929 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 1930 Addr = CGF.Builder.CreateElementBitCast( 1931 Addr, CGF.ConvertTypeForMem(Var->getType())); 1932 return Addr; 1933 } 1934 1935 static llvm::Value *emitCopyprivateCopyFunction( 1936 CodeGenModule &CGM, llvm::Type *ArgsType, 1937 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 1938 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 1939 auto &C = CGM.getContext(); 1940 // void copy_func(void *LHSArg, void *RHSArg); 1941 FunctionArgList Args; 1942 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1943 C.VoidPtrTy); 1944 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1945 C.VoidPtrTy); 1946 Args.push_back(&LHSArg); 1947 Args.push_back(&RHSArg); 1948 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1949 auto *Fn = llvm::Function::Create( 1950 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1951 ".omp.copyprivate.copy_func", &CGM.getModule()); 1952 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 1953 CodeGenFunction CGF(CGM); 1954 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1955 // Dest = (void*[n])(LHSArg); 1956 // Src = (void*[n])(RHSArg); 1957 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1958 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 1959 ArgsType), CGF.getPointerAlign()); 1960 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1961 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 1962 ArgsType), CGF.getPointerAlign()); 1963 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1964 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1965 // ... 1966 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1967 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1968 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 1969 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 1970 1971 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 1972 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 1973 1974 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 1975 QualType Type = VD->getType(); 1976 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 1977 } 1978 CGF.FinishFunction(); 1979 return Fn; 1980 } 1981 1982 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1983 const RegionCodeGenTy &SingleOpGen, 1984 SourceLocation Loc, 1985 ArrayRef<const Expr *> CopyprivateVars, 1986 ArrayRef<const Expr *> SrcExprs, 1987 ArrayRef<const Expr *> DstExprs, 1988 ArrayRef<const Expr *> AssignmentOps) { 1989 if (!CGF.HaveInsertPoint()) 1990 return; 1991 assert(CopyprivateVars.size() == SrcExprs.size() && 1992 CopyprivateVars.size() == DstExprs.size() && 1993 CopyprivateVars.size() == AssignmentOps.size()); 1994 auto &C = CGM.getContext(); 1995 // int32 did_it = 0; 1996 // if(__kmpc_single(ident_t *, gtid)) { 1997 // SingleOpGen(); 1998 // __kmpc_end_single(ident_t *, gtid); 1999 // did_it = 1; 2000 // } 2001 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2002 // <copy_func>, did_it); 2003 2004 Address DidIt = Address::invalid(); 2005 if (!CopyprivateVars.empty()) { 2006 // int32 did_it = 0; 2007 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2008 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2009 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2010 } 2011 // Prepare arguments and build a call to __kmpc_single 2012 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2013 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 2014 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 2015 /*Conditional=*/true); 2016 SingleOpGen.setAction(Action); 2017 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2018 if (DidIt.isValid()) { 2019 // did_it = 1; 2020 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2021 } 2022 Action.Done(CGF); 2023 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2024 // <copy_func>, did_it); 2025 if (DidIt.isValid()) { 2026 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2027 auto CopyprivateArrayTy = 2028 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2029 /*IndexTypeQuals=*/0); 2030 // Create a list of all private variables for copyprivate. 2031 Address CopyprivateList = 2032 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2033 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2034 Address Elem = CGF.Builder.CreateConstArrayGEP( 2035 CopyprivateList, I, CGF.getPointerSize()); 2036 CGF.Builder.CreateStore( 2037 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2038 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 2039 Elem); 2040 } 2041 // Build function that copies private values from single region to all other 2042 // threads in the corresponding parallel region. 2043 auto *CpyFn = emitCopyprivateCopyFunction( 2044 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2045 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 2046 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2047 Address CL = 2048 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2049 CGF.VoidPtrTy); 2050 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 2051 llvm::Value *Args[] = { 2052 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2053 getThreadID(CGF, Loc), // i32 <gtid> 2054 BufSize, // size_t <buf_size> 2055 CL.getPointer(), // void *<copyprivate list> 2056 CpyFn, // void (*) (void *, void *) <copy_func> 2057 DidItVal // i32 did_it 2058 }; 2059 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 2060 } 2061 } 2062 2063 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2064 const RegionCodeGenTy &OrderedOpGen, 2065 SourceLocation Loc, bool IsThreads) { 2066 if (!CGF.HaveInsertPoint()) 2067 return; 2068 // __kmpc_ordered(ident_t *, gtid); 2069 // OrderedOpGen(); 2070 // __kmpc_end_ordered(ident_t *, gtid); 2071 // Prepare arguments and build a call to __kmpc_ordered 2072 if (IsThreads) { 2073 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2074 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 2075 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 2076 Args); 2077 OrderedOpGen.setAction(Action); 2078 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2079 return; 2080 } 2081 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2082 } 2083 2084 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2085 OpenMPDirectiveKind Kind, bool EmitChecks, 2086 bool ForceSimpleCall) { 2087 if (!CGF.HaveInsertPoint()) 2088 return; 2089 // Build call __kmpc_cancel_barrier(loc, thread_id); 2090 // Build call __kmpc_barrier(loc, thread_id); 2091 unsigned Flags; 2092 if (Kind == OMPD_for) 2093 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2094 else if (Kind == OMPD_sections) 2095 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2096 else if (Kind == OMPD_single) 2097 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2098 else if (Kind == OMPD_barrier) 2099 Flags = OMP_IDENT_BARRIER_EXPL; 2100 else 2101 Flags = OMP_IDENT_BARRIER_IMPL; 2102 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2103 // thread_id); 2104 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2105 getThreadID(CGF, Loc)}; 2106 if (auto *OMPRegionInfo = 2107 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2108 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2109 auto *Result = CGF.EmitRuntimeCall( 2110 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 2111 if (EmitChecks) { 2112 // if (__kmpc_cancel_barrier()) { 2113 // exit from construct; 2114 // } 2115 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2116 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2117 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2118 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2119 CGF.EmitBlock(ExitBB); 2120 // exit from construct; 2121 auto CancelDestination = 2122 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2123 CGF.EmitBranchThroughCleanup(CancelDestination); 2124 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2125 } 2126 return; 2127 } 2128 } 2129 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 2130 } 2131 2132 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 2133 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2134 bool Chunked, bool Ordered) { 2135 switch (ScheduleKind) { 2136 case OMPC_SCHEDULE_static: 2137 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2138 : (Ordered ? OMP_ord_static : OMP_sch_static); 2139 case OMPC_SCHEDULE_dynamic: 2140 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2141 case OMPC_SCHEDULE_guided: 2142 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2143 case OMPC_SCHEDULE_runtime: 2144 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2145 case OMPC_SCHEDULE_auto: 2146 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2147 case OMPC_SCHEDULE_unknown: 2148 assert(!Chunked && "chunk was specified but schedule kind not known"); 2149 return Ordered ? OMP_ord_static : OMP_sch_static; 2150 } 2151 llvm_unreachable("Unexpected runtime schedule"); 2152 } 2153 2154 /// \brief Map the OpenMP distribute schedule to the runtime enumeration. 2155 static OpenMPSchedType 2156 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2157 // only static is allowed for dist_schedule 2158 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2159 } 2160 2161 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2162 bool Chunked) const { 2163 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2164 return Schedule == OMP_sch_static; 2165 } 2166 2167 bool CGOpenMPRuntime::isStaticNonchunked( 2168 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2169 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2170 return Schedule == OMP_dist_sch_static; 2171 } 2172 2173 2174 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2175 auto Schedule = 2176 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2177 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2178 return Schedule != OMP_sch_static; 2179 } 2180 2181 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 2182 SourceLocation Loc, 2183 OpenMPScheduleClauseKind ScheduleKind, 2184 unsigned IVSize, bool IVSigned, 2185 bool Ordered, llvm::Value *UB, 2186 llvm::Value *Chunk) { 2187 if (!CGF.HaveInsertPoint()) 2188 return; 2189 OpenMPSchedType Schedule = 2190 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 2191 assert(Ordered || 2192 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2193 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); 2194 // Call __kmpc_dispatch_init( 2195 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2196 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2197 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2198 2199 // If the Chunk was not specified in the clause - use default value 1. 2200 if (Chunk == nullptr) 2201 Chunk = CGF.Builder.getIntN(IVSize, 1); 2202 llvm::Value *Args[] = { 2203 emitUpdateLocation(CGF, Loc), 2204 getThreadID(CGF, Loc), 2205 CGF.Builder.getInt32(Schedule), // Schedule type 2206 CGF.Builder.getIntN(IVSize, 0), // Lower 2207 UB, // Upper 2208 CGF.Builder.getIntN(IVSize, 1), // Stride 2209 Chunk // Chunk 2210 }; 2211 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2212 } 2213 2214 static void emitForStaticInitCall(CodeGenFunction &CGF, 2215 SourceLocation Loc, 2216 llvm::Value * UpdateLocation, 2217 llvm::Value * ThreadId, 2218 llvm::Constant * ForStaticInitFunction, 2219 OpenMPSchedType Schedule, 2220 unsigned IVSize, bool IVSigned, bool Ordered, 2221 Address IL, Address LB, Address UB, 2222 Address ST, llvm::Value *Chunk) { 2223 if (!CGF.HaveInsertPoint()) 2224 return; 2225 2226 assert(!Ordered); 2227 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2228 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2229 Schedule == OMP_dist_sch_static || 2230 Schedule == OMP_dist_sch_static_chunked); 2231 2232 // Call __kmpc_for_static_init( 2233 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2234 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2235 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2236 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2237 if (Chunk == nullptr) { 2238 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2239 Schedule == OMP_dist_sch_static) && 2240 "expected static non-chunked schedule"); 2241 // If the Chunk was not specified in the clause - use default value 1. 2242 Chunk = CGF.Builder.getIntN(IVSize, 1); 2243 } else { 2244 assert((Schedule == OMP_sch_static_chunked || 2245 Schedule == OMP_ord_static_chunked || 2246 Schedule == OMP_dist_sch_static_chunked) && 2247 "expected static chunked schedule"); 2248 } 2249 llvm::Value *Args[] = { 2250 UpdateLocation, 2251 ThreadId, 2252 CGF.Builder.getInt32(Schedule), // Schedule type 2253 IL.getPointer(), // &isLastIter 2254 LB.getPointer(), // &LB 2255 UB.getPointer(), // &UB 2256 ST.getPointer(), // &Stride 2257 CGF.Builder.getIntN(IVSize, 1), // Incr 2258 Chunk // Chunk 2259 }; 2260 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2261 } 2262 2263 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2264 SourceLocation Loc, 2265 OpenMPScheduleClauseKind ScheduleKind, 2266 unsigned IVSize, bool IVSigned, 2267 bool Ordered, Address IL, Address LB, 2268 Address UB, Address ST, 2269 llvm::Value *Chunk) { 2270 OpenMPSchedType ScheduleNum = getRuntimeSchedule(ScheduleKind, Chunk != nullptr, 2271 Ordered); 2272 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2273 auto *ThreadId = getThreadID(CGF, Loc); 2274 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2275 emitForStaticInitCall(CGF, Loc, UpdatedLocation, ThreadId, StaticInitFunction, 2276 ScheduleNum, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); 2277 } 2278 2279 void CGOpenMPRuntime::emitDistributeStaticInit(CodeGenFunction &CGF, 2280 SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, 2281 unsigned IVSize, bool IVSigned, 2282 bool Ordered, Address IL, Address LB, 2283 Address UB, Address ST, 2284 llvm::Value *Chunk) { 2285 OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); 2286 auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); 2287 auto *ThreadId = getThreadID(CGF, Loc); 2288 auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); 2289 emitForStaticInitCall(CGF, Loc, UpdatedLocation, ThreadId, StaticInitFunction, 2290 ScheduleNum, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); 2291 } 2292 2293 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2294 SourceLocation Loc) { 2295 if (!CGF.HaveInsertPoint()) 2296 return; 2297 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2298 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2299 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 2300 Args); 2301 } 2302 2303 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2304 SourceLocation Loc, 2305 unsigned IVSize, 2306 bool IVSigned) { 2307 if (!CGF.HaveInsertPoint()) 2308 return; 2309 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2310 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2311 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2312 } 2313 2314 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2315 SourceLocation Loc, unsigned IVSize, 2316 bool IVSigned, Address IL, 2317 Address LB, Address UB, 2318 Address ST) { 2319 // Call __kmpc_dispatch_next( 2320 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2321 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2322 // kmp_int[32|64] *p_stride); 2323 llvm::Value *Args[] = { 2324 emitUpdateLocation(CGF, Loc), 2325 getThreadID(CGF, Loc), 2326 IL.getPointer(), // &isLastIter 2327 LB.getPointer(), // &Lower 2328 UB.getPointer(), // &Upper 2329 ST.getPointer() // &Stride 2330 }; 2331 llvm::Value *Call = 2332 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2333 return CGF.EmitScalarConversion( 2334 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 2335 CGF.getContext().BoolTy, Loc); 2336 } 2337 2338 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2339 llvm::Value *NumThreads, 2340 SourceLocation Loc) { 2341 if (!CGF.HaveInsertPoint()) 2342 return; 2343 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2344 llvm::Value *Args[] = { 2345 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2346 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2347 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 2348 Args); 2349 } 2350 2351 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2352 OpenMPProcBindClauseKind ProcBind, 2353 SourceLocation Loc) { 2354 if (!CGF.HaveInsertPoint()) 2355 return; 2356 // Constants for proc bind value accepted by the runtime. 2357 enum ProcBindTy { 2358 ProcBindFalse = 0, 2359 ProcBindTrue, 2360 ProcBindMaster, 2361 ProcBindClose, 2362 ProcBindSpread, 2363 ProcBindIntel, 2364 ProcBindDefault 2365 } RuntimeProcBind; 2366 switch (ProcBind) { 2367 case OMPC_PROC_BIND_master: 2368 RuntimeProcBind = ProcBindMaster; 2369 break; 2370 case OMPC_PROC_BIND_close: 2371 RuntimeProcBind = ProcBindClose; 2372 break; 2373 case OMPC_PROC_BIND_spread: 2374 RuntimeProcBind = ProcBindSpread; 2375 break; 2376 case OMPC_PROC_BIND_unknown: 2377 llvm_unreachable("Unsupported proc_bind value."); 2378 } 2379 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2380 llvm::Value *Args[] = { 2381 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2382 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 2383 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 2384 } 2385 2386 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2387 SourceLocation Loc) { 2388 if (!CGF.HaveInsertPoint()) 2389 return; 2390 // Build call void __kmpc_flush(ident_t *loc) 2391 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 2392 emitUpdateLocation(CGF, Loc)); 2393 } 2394 2395 namespace { 2396 /// \brief Indexes of fields for type kmp_task_t. 2397 enum KmpTaskTFields { 2398 /// \brief List of shared variables. 2399 KmpTaskTShareds, 2400 /// \brief Task routine. 2401 KmpTaskTRoutine, 2402 /// \brief Partition id for the untied tasks. 2403 KmpTaskTPartId, 2404 /// \brief Function with call of destructors for private variables. 2405 KmpTaskTDestructors, 2406 }; 2407 } // anonymous namespace 2408 2409 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2410 // FIXME: Add other entries type when they become supported. 2411 return OffloadEntriesTargetRegion.empty(); 2412 } 2413 2414 /// \brief Initialize target region entry. 2415 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2416 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2417 StringRef ParentName, unsigned LineNum, 2418 unsigned Order) { 2419 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2420 "only required for the device " 2421 "code generation."); 2422 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2423 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); 2424 ++OffloadingEntriesNum; 2425 } 2426 2427 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2428 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2429 StringRef ParentName, unsigned LineNum, 2430 llvm::Constant *Addr, llvm::Constant *ID) { 2431 // If we are emitting code for a target, the entry is already initialized, 2432 // only has to be registered. 2433 if (CGM.getLangOpts().OpenMPIsDevice) { 2434 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2435 "Entry must exist."); 2436 auto &Entry = 2437 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2438 assert(Entry.isValid() && "Entry not initialized!"); 2439 Entry.setAddress(Addr); 2440 Entry.setID(ID); 2441 return; 2442 } else { 2443 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); 2444 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2445 } 2446 } 2447 2448 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2449 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2450 unsigned LineNum) const { 2451 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2452 if (PerDevice == OffloadEntriesTargetRegion.end()) 2453 return false; 2454 auto PerFile = PerDevice->second.find(FileID); 2455 if (PerFile == PerDevice->second.end()) 2456 return false; 2457 auto PerParentName = PerFile->second.find(ParentName); 2458 if (PerParentName == PerFile->second.end()) 2459 return false; 2460 auto PerLine = PerParentName->second.find(LineNum); 2461 if (PerLine == PerParentName->second.end()) 2462 return false; 2463 // Fail if this entry is already registered. 2464 if (PerLine->second.getAddress() || PerLine->second.getID()) 2465 return false; 2466 return true; 2467 } 2468 2469 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2470 const OffloadTargetRegionEntryInfoActTy &Action) { 2471 // Scan all target region entries and perform the provided action. 2472 for (auto &D : OffloadEntriesTargetRegion) 2473 for (auto &F : D.second) 2474 for (auto &P : F.second) 2475 for (auto &L : P.second) 2476 Action(D.first, F.first, P.first(), L.first, L.second); 2477 } 2478 2479 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 2480 /// \a Codegen. This is used to emit the two functions that register and 2481 /// unregister the descriptor of the current compilation unit. 2482 static llvm::Function * 2483 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 2484 const RegionCodeGenTy &Codegen) { 2485 auto &C = CGM.getContext(); 2486 FunctionArgList Args; 2487 ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), 2488 /*Id=*/nullptr, C.VoidPtrTy); 2489 Args.push_back(&DummyPtr); 2490 2491 CodeGenFunction CGF(CGM); 2492 GlobalDecl(); 2493 auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2494 auto FTy = CGM.getTypes().GetFunctionType(FI); 2495 auto *Fn = 2496 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 2497 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 2498 Codegen(CGF); 2499 CGF.FinishFunction(); 2500 return Fn; 2501 } 2502 2503 llvm::Function * 2504 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 2505 2506 // If we don't have entries or if we are emitting code for the device, we 2507 // don't need to do anything. 2508 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 2509 return nullptr; 2510 2511 auto &M = CGM.getModule(); 2512 auto &C = CGM.getContext(); 2513 2514 // Get list of devices we care about 2515 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 2516 2517 // We should be creating an offloading descriptor only if there are devices 2518 // specified. 2519 assert(!Devices.empty() && "No OpenMP offloading devices??"); 2520 2521 // Create the external variables that will point to the begin and end of the 2522 // host entries section. These will be defined by the linker. 2523 auto *OffloadEntryTy = 2524 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 2525 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 2526 M, OffloadEntryTy, /*isConstant=*/true, 2527 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2528 ".omp_offloading.entries_begin"); 2529 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 2530 M, OffloadEntryTy, /*isConstant=*/true, 2531 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2532 ".omp_offloading.entries_end"); 2533 2534 // Create all device images 2535 llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires; 2536 auto *DeviceImageTy = cast<llvm::StructType>( 2537 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 2538 2539 for (unsigned i = 0; i < Devices.size(); ++i) { 2540 StringRef T = Devices[i].getTriple(); 2541 auto *ImgBegin = new llvm::GlobalVariable( 2542 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2543 /*Initializer=*/nullptr, 2544 Twine(".omp_offloading.img_start.") + Twine(T)); 2545 auto *ImgEnd = new llvm::GlobalVariable( 2546 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2547 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 2548 2549 llvm::Constant *Dev = 2550 llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd, 2551 HostEntriesBegin, HostEntriesEnd, nullptr); 2552 DeviceImagesEntires.push_back(Dev); 2553 } 2554 2555 // Create device images global array. 2556 llvm::ArrayType *DeviceImagesInitTy = 2557 llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size()); 2558 llvm::Constant *DeviceImagesInit = 2559 llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires); 2560 2561 llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable( 2562 M, DeviceImagesInitTy, /*isConstant=*/true, 2563 llvm::GlobalValue::InternalLinkage, DeviceImagesInit, 2564 ".omp_offloading.device_images"); 2565 DeviceImages->setUnnamedAddr(true); 2566 2567 // This is a Zero array to be used in the creation of the constant expressions 2568 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 2569 llvm::Constant::getNullValue(CGM.Int32Ty)}; 2570 2571 // Create the target region descriptor. 2572 auto *BinaryDescriptorTy = cast<llvm::StructType>( 2573 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 2574 llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get( 2575 BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 2576 llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages, 2577 Index), 2578 HostEntriesBegin, HostEntriesEnd, nullptr); 2579 2580 auto *Desc = new llvm::GlobalVariable( 2581 M, BinaryDescriptorTy, /*isConstant=*/true, 2582 llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit, 2583 ".omp_offloading.descriptor"); 2584 2585 // Emit code to register or unregister the descriptor at execution 2586 // startup or closing, respectively. 2587 2588 // Create a variable to drive the registration and unregistration of the 2589 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 2590 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 2591 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 2592 IdentInfo, C.CharTy); 2593 2594 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 2595 CGM, ".omp_offloading.descriptor_unreg", 2596 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2597 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 2598 Desc); 2599 }); 2600 auto *RegFn = createOffloadingBinaryDescriptorFunction( 2601 CGM, ".omp_offloading.descriptor_reg", 2602 [&](CodeGenFunction &CGF, PrePostActionTy &) { 2603 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 2604 Desc); 2605 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 2606 }); 2607 return RegFn; 2608 } 2609 2610 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, 2611 llvm::Constant *Addr, uint64_t Size) { 2612 StringRef Name = Addr->getName(); 2613 auto *TgtOffloadEntryType = cast<llvm::StructType>( 2614 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 2615 llvm::LLVMContext &C = CGM.getModule().getContext(); 2616 llvm::Module &M = CGM.getModule(); 2617 2618 // Make sure the address has the right type. 2619 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); 2620 2621 // Create constant string with the name. 2622 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 2623 2624 llvm::GlobalVariable *Str = 2625 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 2626 llvm::GlobalValue::InternalLinkage, StrPtrInit, 2627 ".omp_offloading.entry_name"); 2628 Str->setUnnamedAddr(true); 2629 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 2630 2631 // Create the entry struct. 2632 llvm::Constant *EntryInit = llvm::ConstantStruct::get( 2633 TgtOffloadEntryType, AddrPtr, StrPtr, 2634 llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr); 2635 llvm::GlobalVariable *Entry = new llvm::GlobalVariable( 2636 M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage, 2637 EntryInit, ".omp_offloading.entry"); 2638 2639 // The entry has to be created in the section the linker expects it to be. 2640 Entry->setSection(".omp_offloading.entries"); 2641 // We can't have any padding between symbols, so we need to have 1-byte 2642 // alignment. 2643 Entry->setAlignment(1); 2644 } 2645 2646 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2647 // Emit the offloading entries and metadata so that the device codegen side 2648 // can 2649 // easily figure out what to emit. The produced metadata looks like this: 2650 // 2651 // !omp_offload.info = !{!1, ...} 2652 // 2653 // Right now we only generate metadata for function that contain target 2654 // regions. 2655 2656 // If we do not have entries, we dont need to do anything. 2657 if (OffloadEntriesInfoManager.empty()) 2658 return; 2659 2660 llvm::Module &M = CGM.getModule(); 2661 llvm::LLVMContext &C = M.getContext(); 2662 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 2663 OrderedEntries(OffloadEntriesInfoManager.size()); 2664 2665 // Create the offloading info metadata node. 2666 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 2667 2668 // Auxiliar methods to create metadata values and strings. 2669 auto getMDInt = [&](unsigned v) { 2670 return llvm::ConstantAsMetadata::get( 2671 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 2672 }; 2673 2674 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 2675 2676 // Create function that emits metadata for each target region entry; 2677 auto &&TargetRegionMetadataEmitter = [&]( 2678 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 2679 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 2680 llvm::SmallVector<llvm::Metadata *, 32> Ops; 2681 // Generate metadata for target regions. Each entry of this metadata 2682 // contains: 2683 // - Entry 0 -> Kind of this type of metadata (0). 2684 // - Entry 1 -> Device ID of the file where the entry was identified. 2685 // - Entry 2 -> File ID of the file where the entry was identified. 2686 // - Entry 3 -> Mangled name of the function where the entry was identified. 2687 // - Entry 4 -> Line in the file where the entry was identified. 2688 // - Entry 5 -> Order the entry was created. 2689 // The first element of the metadata node is the kind. 2690 Ops.push_back(getMDInt(E.getKind())); 2691 Ops.push_back(getMDInt(DeviceID)); 2692 Ops.push_back(getMDInt(FileID)); 2693 Ops.push_back(getMDString(ParentName)); 2694 Ops.push_back(getMDInt(Line)); 2695 Ops.push_back(getMDInt(E.getOrder())); 2696 2697 // Save this entry in the right position of the ordered entries array. 2698 OrderedEntries[E.getOrder()] = &E; 2699 2700 // Add metadata to the named metadata node. 2701 MD->addOperand(llvm::MDNode::get(C, Ops)); 2702 }; 2703 2704 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 2705 TargetRegionMetadataEmitter); 2706 2707 for (auto *E : OrderedEntries) { 2708 assert(E && "All ordered entries must exist!"); 2709 if (auto *CE = 2710 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 2711 E)) { 2712 assert(CE->getID() && CE->getAddress() && 2713 "Entry ID and Addr are invalid!"); 2714 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); 2715 } else 2716 llvm_unreachable("Unsupported entry kind."); 2717 } 2718 } 2719 2720 /// \brief Loads all the offload entries information from the host IR 2721 /// metadata. 2722 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 2723 // If we are in target mode, load the metadata from the host IR. This code has 2724 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 2725 2726 if (!CGM.getLangOpts().OpenMPIsDevice) 2727 return; 2728 2729 if (CGM.getLangOpts().OMPHostIRFile.empty()) 2730 return; 2731 2732 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 2733 if (Buf.getError()) 2734 return; 2735 2736 llvm::LLVMContext C; 2737 auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C); 2738 2739 if (ME.getError()) 2740 return; 2741 2742 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 2743 if (!MD) 2744 return; 2745 2746 for (auto I : MD->operands()) { 2747 llvm::MDNode *MN = cast<llvm::MDNode>(I); 2748 2749 auto getMDInt = [&](unsigned Idx) { 2750 llvm::ConstantAsMetadata *V = 2751 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 2752 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 2753 }; 2754 2755 auto getMDString = [&](unsigned Idx) { 2756 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 2757 return V->getString(); 2758 }; 2759 2760 switch (getMDInt(0)) { 2761 default: 2762 llvm_unreachable("Unexpected metadata!"); 2763 break; 2764 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 2765 OFFLOAD_ENTRY_INFO_TARGET_REGION: 2766 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 2767 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 2768 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 2769 /*Order=*/getMDInt(5)); 2770 break; 2771 } 2772 } 2773 } 2774 2775 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 2776 if (!KmpRoutineEntryPtrTy) { 2777 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 2778 auto &C = CGM.getContext(); 2779 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 2780 FunctionProtoType::ExtProtoInfo EPI; 2781 KmpRoutineEntryPtrQTy = C.getPointerType( 2782 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 2783 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 2784 } 2785 } 2786 2787 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 2788 QualType FieldTy) { 2789 auto *Field = FieldDecl::Create( 2790 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 2791 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 2792 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 2793 Field->setAccess(AS_public); 2794 DC->addDecl(Field); 2795 return Field; 2796 } 2797 2798 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 2799 2800 // Make sure the type of the entry is already created. This is the type we 2801 // have to create: 2802 // struct __tgt_offload_entry{ 2803 // void *addr; // Pointer to the offload entry info. 2804 // // (function or global) 2805 // char *name; // Name of the function or global. 2806 // size_t size; // Size of the entry info (0 if it a function). 2807 // }; 2808 if (TgtOffloadEntryQTy.isNull()) { 2809 ASTContext &C = CGM.getContext(); 2810 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 2811 RD->startDefinition(); 2812 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2813 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 2814 addFieldToRecordDecl(C, RD, C.getSizeType()); 2815 RD->completeDefinition(); 2816 TgtOffloadEntryQTy = C.getRecordType(RD); 2817 } 2818 return TgtOffloadEntryQTy; 2819 } 2820 2821 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 2822 // These are the types we need to build: 2823 // struct __tgt_device_image{ 2824 // void *ImageStart; // Pointer to the target code start. 2825 // void *ImageEnd; // Pointer to the target code end. 2826 // // We also add the host entries to the device image, as it may be useful 2827 // // for the target runtime to have access to that information. 2828 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 2829 // // the entries. 2830 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 2831 // // entries (non inclusive). 2832 // }; 2833 if (TgtDeviceImageQTy.isNull()) { 2834 ASTContext &C = CGM.getContext(); 2835 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 2836 RD->startDefinition(); 2837 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2838 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2839 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2840 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2841 RD->completeDefinition(); 2842 TgtDeviceImageQTy = C.getRecordType(RD); 2843 } 2844 return TgtDeviceImageQTy; 2845 } 2846 2847 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 2848 // struct __tgt_bin_desc{ 2849 // int32_t NumDevices; // Number of devices supported. 2850 // __tgt_device_image *DeviceImages; // Arrays of device images 2851 // // (one per device). 2852 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 2853 // // entries. 2854 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 2855 // // entries (non inclusive). 2856 // }; 2857 if (TgtBinaryDescriptorQTy.isNull()) { 2858 ASTContext &C = CGM.getContext(); 2859 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 2860 RD->startDefinition(); 2861 addFieldToRecordDecl( 2862 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 2863 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 2864 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2865 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2866 RD->completeDefinition(); 2867 TgtBinaryDescriptorQTy = C.getRecordType(RD); 2868 } 2869 return TgtBinaryDescriptorQTy; 2870 } 2871 2872 namespace { 2873 struct PrivateHelpersTy { 2874 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 2875 const VarDecl *PrivateElemInit) 2876 : Original(Original), PrivateCopy(PrivateCopy), 2877 PrivateElemInit(PrivateElemInit) {} 2878 const VarDecl *Original; 2879 const VarDecl *PrivateCopy; 2880 const VarDecl *PrivateElemInit; 2881 }; 2882 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 2883 } // anonymous namespace 2884 2885 static RecordDecl * 2886 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 2887 if (!Privates.empty()) { 2888 auto &C = CGM.getContext(); 2889 // Build struct .kmp_privates_t. { 2890 // /* private vars */ 2891 // }; 2892 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 2893 RD->startDefinition(); 2894 for (auto &&Pair : Privates) { 2895 auto *VD = Pair.second.Original; 2896 auto Type = VD->getType(); 2897 Type = Type.getNonReferenceType(); 2898 auto *FD = addFieldToRecordDecl(C, RD, Type); 2899 if (VD->hasAttrs()) { 2900 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 2901 E(VD->getAttrs().end()); 2902 I != E; ++I) 2903 FD->addAttr(*I); 2904 } 2905 } 2906 RD->completeDefinition(); 2907 return RD; 2908 } 2909 return nullptr; 2910 } 2911 2912 static RecordDecl * 2913 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, 2914 QualType KmpRoutineEntryPointerQTy) { 2915 auto &C = CGM.getContext(); 2916 // Build struct kmp_task_t { 2917 // void * shareds; 2918 // kmp_routine_entry_t routine; 2919 // kmp_int32 part_id; 2920 // kmp_routine_entry_t destructors; 2921 // }; 2922 auto *RD = C.buildImplicitRecord("kmp_task_t"); 2923 RD->startDefinition(); 2924 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2925 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2926 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2927 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2928 RD->completeDefinition(); 2929 return RD; 2930 } 2931 2932 static RecordDecl * 2933 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 2934 ArrayRef<PrivateDataTy> Privates) { 2935 auto &C = CGM.getContext(); 2936 // Build struct kmp_task_t_with_privates { 2937 // kmp_task_t task_data; 2938 // .kmp_privates_t. privates; 2939 // }; 2940 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 2941 RD->startDefinition(); 2942 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 2943 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 2944 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 2945 } 2946 RD->completeDefinition(); 2947 return RD; 2948 } 2949 2950 /// \brief Emit a proxy function which accepts kmp_task_t as the second 2951 /// argument. 2952 /// \code 2953 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 2954 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, 2955 /// tt->shareds); 2956 /// return 0; 2957 /// } 2958 /// \endcode 2959 static llvm::Value * 2960 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 2961 QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, 2962 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 2963 QualType SharedsPtrTy, llvm::Value *TaskFunction, 2964 llvm::Value *TaskPrivatesMap) { 2965 auto &C = CGM.getContext(); 2966 FunctionArgList Args; 2967 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 2968 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 2969 /*Id=*/nullptr, 2970 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 2971 Args.push_back(&GtidArg); 2972 Args.push_back(&TaskTypeArg); 2973 auto &TaskEntryFnInfo = 2974 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 2975 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 2976 auto *TaskEntry = 2977 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 2978 ".omp_task_entry.", &CGM.getModule()); 2979 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 2980 CodeGenFunction CGF(CGM); 2981 CGF.disableDebugInfo(); 2982 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 2983 2984 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 2985 // tt->task_data.shareds); 2986 auto *GtidParam = CGF.EmitLoadOfScalar( 2987 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 2988 LValue TDBase = CGF.EmitLoadOfPointerLValue( 2989 CGF.GetAddrOfLocalVar(&TaskTypeArg), 2990 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 2991 auto *KmpTaskTWithPrivatesQTyRD = 2992 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2993 LValue Base = 2994 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2995 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2996 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 2997 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 2998 auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); 2999 3000 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3001 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3002 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3003 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 3004 CGF.ConvertTypeForMem(SharedsPtrTy)); 3005 3006 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3007 llvm::Value *PrivatesParam; 3008 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3009 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3010 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3011 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 3012 } else { 3013 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3014 } 3015 3016 llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, 3017 TaskPrivatesMap, SharedsParam}; 3018 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 3019 CGF.EmitStoreThroughLValue( 3020 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3021 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3022 CGF.FinishFunction(); 3023 return TaskEntry; 3024 } 3025 3026 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3027 SourceLocation Loc, 3028 QualType KmpInt32Ty, 3029 QualType KmpTaskTWithPrivatesPtrQTy, 3030 QualType KmpTaskTWithPrivatesQTy) { 3031 auto &C = CGM.getContext(); 3032 FunctionArgList Args; 3033 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 3034 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 3035 /*Id=*/nullptr, 3036 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 3037 Args.push_back(&GtidArg); 3038 Args.push_back(&TaskTypeArg); 3039 FunctionType::ExtInfo Info; 3040 auto &DestructorFnInfo = 3041 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3042 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 3043 auto *DestructorFn = 3044 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3045 ".omp_task_destructor.", &CGM.getModule()); 3046 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 3047 DestructorFnInfo); 3048 CodeGenFunction CGF(CGM); 3049 CGF.disableDebugInfo(); 3050 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3051 Args); 3052 3053 LValue Base = CGF.EmitLoadOfPointerLValue( 3054 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3055 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3056 auto *KmpTaskTWithPrivatesQTyRD = 3057 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3058 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3059 Base = CGF.EmitLValueForField(Base, *FI); 3060 for (auto *Field : 3061 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3062 if (auto DtorKind = Field->getType().isDestructedType()) { 3063 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 3064 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3065 } 3066 } 3067 CGF.FinishFunction(); 3068 return DestructorFn; 3069 } 3070 3071 /// \brief Emit a privates mapping function for correct handling of private and 3072 /// firstprivate variables. 3073 /// \code 3074 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3075 /// **noalias priv1,..., <tyn> **noalias privn) { 3076 /// *priv1 = &.privates.priv1; 3077 /// ...; 3078 /// *privn = &.privates.privn; 3079 /// } 3080 /// \endcode 3081 static llvm::Value * 3082 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3083 ArrayRef<const Expr *> PrivateVars, 3084 ArrayRef<const Expr *> FirstprivateVars, 3085 QualType PrivatesQTy, 3086 ArrayRef<PrivateDataTy> Privates) { 3087 auto &C = CGM.getContext(); 3088 FunctionArgList Args; 3089 ImplicitParamDecl TaskPrivatesArg( 3090 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3091 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 3092 Args.push_back(&TaskPrivatesArg); 3093 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3094 unsigned Counter = 1; 3095 for (auto *E: PrivateVars) { 3096 Args.push_back(ImplicitParamDecl::Create( 3097 C, /*DC=*/nullptr, Loc, 3098 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3099 .withConst() 3100 .withRestrict())); 3101 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3102 PrivateVarsPos[VD] = Counter; 3103 ++Counter; 3104 } 3105 for (auto *E : FirstprivateVars) { 3106 Args.push_back(ImplicitParamDecl::Create( 3107 C, /*DC=*/nullptr, Loc, 3108 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 3109 .withConst() 3110 .withRestrict())); 3111 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3112 PrivateVarsPos[VD] = Counter; 3113 ++Counter; 3114 } 3115 auto &TaskPrivatesMapFnInfo = 3116 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3117 auto *TaskPrivatesMapTy = 3118 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3119 auto *TaskPrivatesMap = llvm::Function::Create( 3120 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 3121 ".omp_task_privates_map.", &CGM.getModule()); 3122 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 3123 TaskPrivatesMapFnInfo); 3124 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3125 CodeGenFunction CGF(CGM); 3126 CGF.disableDebugInfo(); 3127 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3128 TaskPrivatesMapFnInfo, Args); 3129 3130 // *privi = &.privates.privi; 3131 LValue Base = CGF.EmitLoadOfPointerLValue( 3132 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3133 TaskPrivatesArg.getType()->castAs<PointerType>()); 3134 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3135 Counter = 0; 3136 for (auto *Field : PrivatesQTyRD->fields()) { 3137 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 3138 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3139 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3140 auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3141 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3142 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 3143 ++Counter; 3144 } 3145 CGF.FinishFunction(); 3146 return TaskPrivatesMap; 3147 } 3148 3149 static int array_pod_sort_comparator(const PrivateDataTy *P1, 3150 const PrivateDataTy *P2) { 3151 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 3152 } 3153 3154 void CGOpenMPRuntime::emitTaskCall( 3155 CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, 3156 bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 3157 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 3158 const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, 3159 ArrayRef<const Expr *> PrivateCopies, 3160 ArrayRef<const Expr *> FirstprivateVars, 3161 ArrayRef<const Expr *> FirstprivateCopies, 3162 ArrayRef<const Expr *> FirstprivateInits, 3163 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { 3164 if (!CGF.HaveInsertPoint()) 3165 return; 3166 auto &C = CGM.getContext(); 3167 llvm::SmallVector<PrivateDataTy, 8> Privates; 3168 // Aggregate privates and sort them by the alignment. 3169 auto I = PrivateCopies.begin(); 3170 for (auto *E : PrivateVars) { 3171 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3172 Privates.push_back(std::make_pair( 3173 C.getDeclAlign(VD), 3174 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3175 /*PrivateElemInit=*/nullptr))); 3176 ++I; 3177 } 3178 I = FirstprivateCopies.begin(); 3179 auto IElemInitRef = FirstprivateInits.begin(); 3180 for (auto *E : FirstprivateVars) { 3181 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3182 Privates.push_back(std::make_pair( 3183 C.getDeclAlign(VD), 3184 PrivateHelpersTy( 3185 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3186 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 3187 ++I; 3188 ++IElemInitRef; 3189 } 3190 llvm::array_pod_sort(Privates.begin(), Privates.end(), 3191 array_pod_sort_comparator); 3192 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3193 // Build type kmp_routine_entry_t (if not built yet). 3194 emitKmpRoutineEntryT(KmpInt32Ty); 3195 // Build type kmp_task_t (if not built yet). 3196 if (KmpTaskTQTy.isNull()) { 3197 KmpTaskTQTy = C.getRecordType( 3198 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3199 } 3200 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3201 // Build particular struct kmp_task_t for the given task. 3202 auto *KmpTaskTWithPrivatesQTyRD = 3203 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3204 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3205 QualType KmpTaskTWithPrivatesPtrQTy = 3206 C.getPointerType(KmpTaskTWithPrivatesQTy); 3207 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3208 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 3209 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3210 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3211 3212 // Emit initial values for private copies (if any). 3213 llvm::Value *TaskPrivatesMap = nullptr; 3214 auto *TaskPrivatesMapTy = 3215 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 3216 3) 3217 ->getType(); 3218 if (!Privates.empty()) { 3219 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3220 TaskPrivatesMap = emitTaskPrivateMappingFunction( 3221 CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); 3222 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3223 TaskPrivatesMap, TaskPrivatesMapTy); 3224 } else { 3225 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3226 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3227 } 3228 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3229 // kmp_task_t *tt); 3230 auto *TaskEntry = emitProxyTaskFunction( 3231 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, 3232 KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); 3233 3234 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3235 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3236 // kmp_routine_entry_t *task_entry); 3237 // Task flags. Format is taken from 3238 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 3239 // description of kmp_tasking_flags struct. 3240 const unsigned TiedFlag = 0x1; 3241 const unsigned FinalFlag = 0x2; 3242 unsigned Flags = Tied ? TiedFlag : 0; 3243 auto *TaskFlags = 3244 Final.getPointer() 3245 ? CGF.Builder.CreateSelect(Final.getPointer(), 3246 CGF.Builder.getInt32(FinalFlag), 3247 CGF.Builder.getInt32(/*C=*/0)) 3248 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 3249 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3250 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3251 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 3252 getThreadID(CGF, Loc), TaskFlags, 3253 KmpTaskTWithPrivatesTySize, SharedsSize, 3254 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3255 TaskEntry, KmpRoutineEntryPtrTy)}; 3256 auto *NewTask = CGF.EmitRuntimeCall( 3257 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 3258 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3259 NewTask, KmpTaskTWithPrivatesPtrTy); 3260 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 3261 KmpTaskTWithPrivatesQTy); 3262 LValue TDBase = 3263 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3264 // Fill the data in the resulting kmp_task_t record. 3265 // Copy shareds if there are any. 3266 Address KmpTaskSharedsPtr = Address::invalid(); 3267 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3268 KmpTaskSharedsPtr = 3269 Address(CGF.EmitLoadOfScalar( 3270 CGF.EmitLValueForField( 3271 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 3272 KmpTaskTShareds)), 3273 Loc), 3274 CGF.getNaturalTypeAlignment(SharedsTy)); 3275 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 3276 } 3277 // Emit initial values for private copies (if any). 3278 bool NeedsCleanup = false; 3279 if (!Privates.empty()) { 3280 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3281 auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); 3282 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3283 LValue SharedsBase; 3284 if (!FirstprivateVars.empty()) { 3285 SharedsBase = CGF.MakeAddrLValue( 3286 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3287 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3288 SharedsTy); 3289 } 3290 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 3291 cast<CapturedStmt>(*D.getAssociatedStmt())); 3292 for (auto &&Pair : Privates) { 3293 auto *VD = Pair.second.PrivateCopy; 3294 auto *Init = VD->getAnyInitializer(); 3295 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3296 if (Init) { 3297 if (auto *Elem = Pair.second.PrivateElemInit) { 3298 auto *OriginalVD = Pair.second.Original; 3299 auto *SharedField = CapturesInfo.lookup(OriginalVD); 3300 auto SharedRefLValue = 3301 CGF.EmitLValueForField(SharedsBase, SharedField); 3302 SharedRefLValue = CGF.MakeAddrLValue( 3303 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 3304 SharedRefLValue.getType(), AlignmentSource::Decl); 3305 QualType Type = OriginalVD->getType(); 3306 if (Type->isArrayType()) { 3307 // Initialize firstprivate array. 3308 if (!isa<CXXConstructExpr>(Init) || 3309 CGF.isTrivialInitializer(Init)) { 3310 // Perform simple memcpy. 3311 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 3312 SharedRefLValue.getAddress(), Type); 3313 } else { 3314 // Initialize firstprivate array using element-by-element 3315 // intialization. 3316 CGF.EmitOMPAggregateAssign( 3317 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), 3318 Type, [&CGF, Elem, Init, &CapturesInfo]( 3319 Address DestElement, Address SrcElement) { 3320 // Clean up any temporaries needed by the initialization. 3321 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3322 InitScope.addPrivate(Elem, [SrcElement]() -> Address { 3323 return SrcElement; 3324 }); 3325 (void)InitScope.Privatize(); 3326 // Emit initialization for single element. 3327 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3328 CGF, &CapturesInfo); 3329 CGF.EmitAnyExprToMem(Init, DestElement, 3330 Init->getType().getQualifiers(), 3331 /*IsInitializer=*/false); 3332 }); 3333 } 3334 } else { 3335 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3336 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 3337 return SharedRefLValue.getAddress(); 3338 }); 3339 (void)InitScope.Privatize(); 3340 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3341 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3342 /*capturedByInit=*/false); 3343 } 3344 } else { 3345 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3346 } 3347 } 3348 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); 3349 ++FI; 3350 } 3351 } 3352 // Provide pointer to function with destructors for privates. 3353 llvm::Value *DestructorFn = 3354 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, 3355 KmpTaskTWithPrivatesPtrQTy, 3356 KmpTaskTWithPrivatesQTy) 3357 : llvm::ConstantPointerNull::get( 3358 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 3359 LValue Destructor = CGF.EmitLValueForField( 3360 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); 3361 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3362 DestructorFn, KmpRoutineEntryPtrTy), 3363 Destructor); 3364 3365 // Process list of dependences. 3366 Address DependenciesArray = Address::invalid(); 3367 unsigned NumDependencies = Dependences.size(); 3368 if (NumDependencies) { 3369 // Dependence kind for RTL. 3370 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 3371 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 3372 RecordDecl *KmpDependInfoRD; 3373 QualType FlagsTy = 3374 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 3375 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 3376 if (KmpDependInfoTy.isNull()) { 3377 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 3378 KmpDependInfoRD->startDefinition(); 3379 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 3380 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 3381 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 3382 KmpDependInfoRD->completeDefinition(); 3383 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 3384 } else { 3385 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 3386 } 3387 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 3388 // Define type kmp_depend_info[<Dependences.size()>]; 3389 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 3390 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 3391 ArrayType::Normal, /*IndexTypeQuals=*/0); 3392 // kmp_depend_info[<Dependences.size()>] deps; 3393 DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); 3394 for (unsigned i = 0; i < NumDependencies; ++i) { 3395 const Expr *E = Dependences[i].second; 3396 auto Addr = CGF.EmitLValue(E); 3397 llvm::Value *Size; 3398 QualType Ty = E->getType(); 3399 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3400 LValue UpAddrLVal = 3401 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 3402 llvm::Value *UpAddr = 3403 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 3404 llvm::Value *LowIntPtr = 3405 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 3406 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 3407 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3408 } else 3409 Size = CGF.getTypeSize(Ty); 3410 auto Base = CGF.MakeAddrLValue( 3411 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 3412 KmpDependInfoTy); 3413 // deps[i].base_addr = &<Dependences[i].second>; 3414 auto BaseAddrLVal = CGF.EmitLValueForField( 3415 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 3416 CGF.EmitStoreOfScalar( 3417 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 3418 BaseAddrLVal); 3419 // deps[i].len = sizeof(<Dependences[i].second>); 3420 auto LenLVal = CGF.EmitLValueForField( 3421 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 3422 CGF.EmitStoreOfScalar(Size, LenLVal); 3423 // deps[i].flags = <Dependences[i].first>; 3424 RTLDependenceKindTy DepKind; 3425 switch (Dependences[i].first) { 3426 case OMPC_DEPEND_in: 3427 DepKind = DepIn; 3428 break; 3429 // Out and InOut dependencies must use the same code. 3430 case OMPC_DEPEND_out: 3431 case OMPC_DEPEND_inout: 3432 DepKind = DepInOut; 3433 break; 3434 case OMPC_DEPEND_source: 3435 case OMPC_DEPEND_sink: 3436 case OMPC_DEPEND_unknown: 3437 llvm_unreachable("Unknown task dependence type"); 3438 } 3439 auto FlagsLVal = CGF.EmitLValueForField( 3440 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 3441 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 3442 FlagsLVal); 3443 } 3444 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3445 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 3446 CGF.VoidPtrTy); 3447 } 3448 3449 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 3450 // libcall. 3451 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 3452 // *new_task); 3453 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 3454 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 3455 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 3456 // list is not empty 3457 auto *ThreadID = getThreadID(CGF, Loc); 3458 auto *UpLoc = emitUpdateLocation(CGF, Loc); 3459 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 3460 llvm::Value *DepTaskArgs[7]; 3461 if (NumDependencies) { 3462 DepTaskArgs[0] = UpLoc; 3463 DepTaskArgs[1] = ThreadID; 3464 DepTaskArgs[2] = NewTask; 3465 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 3466 DepTaskArgs[4] = DependenciesArray.getPointer(); 3467 DepTaskArgs[5] = CGF.Builder.getInt32(0); 3468 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3469 } 3470 auto &&ThenCodeGen = [NumDependencies, &TaskArgs, 3471 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 3472 // TODO: add check for untied tasks. 3473 auto &RT = CGF.CGM.getOpenMPRuntime(); 3474 if (NumDependencies) { 3475 CGF.EmitRuntimeCall( 3476 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), 3477 DepTaskArgs); 3478 } else { 3479 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_task), 3480 TaskArgs); 3481 } 3482 }; 3483 3484 llvm::Value *DepWaitTaskArgs[6]; 3485 if (NumDependencies) { 3486 DepWaitTaskArgs[0] = UpLoc; 3487 DepWaitTaskArgs[1] = ThreadID; 3488 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 3489 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 3490 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 3491 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3492 } 3493 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 3494 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, 3495 PrePostActionTy &) { 3496 auto &RT = CGF.CGM.getOpenMPRuntime(); 3497 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 3498 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 3499 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 3500 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 3501 // is specified. 3502 if (NumDependencies) 3503 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 3504 DepWaitTaskArgs); 3505 // Call proxy_task_entry(gtid, new_task); 3506 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( 3507 CodeGenFunction &CGF, PrePostActionTy &Action) { 3508 Action.Enter(CGF); 3509 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 3510 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 3511 }; 3512 3513 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 3514 // kmp_task_t *new_task); 3515 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 3516 // kmp_task_t *new_task); 3517 RegionCodeGenTy RCG(CodeGen); 3518 CommonActionTy Action( 3519 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 3520 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 3521 RCG.setAction(Action); 3522 RCG(CGF); 3523 }; 3524 3525 if (IfCond) 3526 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 3527 else { 3528 RegionCodeGenTy ThenRCG(ThenCodeGen); 3529 ThenRCG(CGF); 3530 } 3531 } 3532 3533 /// \brief Emit reduction operation for each element of array (required for 3534 /// array sections) LHS op = RHS. 3535 /// \param Type Type of array. 3536 /// \param LHSVar Variable on the left side of the reduction operation 3537 /// (references element of array in original variable). 3538 /// \param RHSVar Variable on the right side of the reduction operation 3539 /// (references element of array in original variable). 3540 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 3541 /// RHSVar. 3542 static void EmitOMPAggregateReduction( 3543 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 3544 const VarDecl *RHSVar, 3545 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 3546 const Expr *, const Expr *)> &RedOpGen, 3547 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 3548 const Expr *UpExpr = nullptr) { 3549 // Perform element-by-element initialization. 3550 QualType ElementTy; 3551 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 3552 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 3553 3554 // Drill down to the base element type on both arrays. 3555 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 3556 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 3557 3558 auto RHSBegin = RHSAddr.getPointer(); 3559 auto LHSBegin = LHSAddr.getPointer(); 3560 // Cast from pointer to array type to pointer to single element. 3561 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 3562 // The basic structure here is a while-do loop. 3563 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 3564 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 3565 auto IsEmpty = 3566 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 3567 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 3568 3569 // Enter the loop body, making that address the current address. 3570 auto EntryBB = CGF.Builder.GetInsertBlock(); 3571 CGF.EmitBlock(BodyBB); 3572 3573 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 3574 3575 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 3576 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 3577 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 3578 Address RHSElementCurrent = 3579 Address(RHSElementPHI, 3580 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 3581 3582 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 3583 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 3584 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 3585 Address LHSElementCurrent = 3586 Address(LHSElementPHI, 3587 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 3588 3589 // Emit copy. 3590 CodeGenFunction::OMPPrivateScope Scope(CGF); 3591 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 3592 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 3593 Scope.Privatize(); 3594 RedOpGen(CGF, XExpr, EExpr, UpExpr); 3595 Scope.ForceCleanup(); 3596 3597 // Shift the address forward by one element. 3598 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 3599 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 3600 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 3601 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 3602 // Check whether we've reached the end. 3603 auto Done = 3604 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 3605 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 3606 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 3607 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 3608 3609 // Done. 3610 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 3611 } 3612 3613 /// Emit reduction combiner. If the combiner is a simple expression emit it as 3614 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 3615 /// UDR combiner function. 3616 static void emitReductionCombiner(CodeGenFunction &CGF, 3617 const Expr *ReductionOp) { 3618 if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) 3619 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 3620 if (auto *DRE = 3621 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 3622 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 3623 std::pair<llvm::Function *, llvm::Function *> Reduction = 3624 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 3625 RValue Func = RValue::get(Reduction.first); 3626 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 3627 CGF.EmitIgnoredExpr(ReductionOp); 3628 return; 3629 } 3630 CGF.EmitIgnoredExpr(ReductionOp); 3631 } 3632 3633 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 3634 llvm::Type *ArgsType, 3635 ArrayRef<const Expr *> Privates, 3636 ArrayRef<const Expr *> LHSExprs, 3637 ArrayRef<const Expr *> RHSExprs, 3638 ArrayRef<const Expr *> ReductionOps) { 3639 auto &C = CGM.getContext(); 3640 3641 // void reduction_func(void *LHSArg, void *RHSArg); 3642 FunctionArgList Args; 3643 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 3644 C.VoidPtrTy); 3645 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 3646 C.VoidPtrTy); 3647 Args.push_back(&LHSArg); 3648 Args.push_back(&RHSArg); 3649 auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3650 auto *Fn = llvm::Function::Create( 3651 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 3652 ".omp.reduction.reduction_func", &CGM.getModule()); 3653 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 3654 CodeGenFunction CGF(CGM); 3655 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 3656 3657 // Dst = (void*[n])(LHSArg); 3658 // Src = (void*[n])(RHSArg); 3659 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3660 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3661 ArgsType), CGF.getPointerAlign()); 3662 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3663 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3664 ArgsType), CGF.getPointerAlign()); 3665 3666 // ... 3667 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 3668 // ... 3669 CodeGenFunction::OMPPrivateScope Scope(CGF); 3670 auto IPriv = Privates.begin(); 3671 unsigned Idx = 0; 3672 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 3673 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 3674 Scope.addPrivate(RHSVar, [&]() -> Address { 3675 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 3676 }); 3677 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 3678 Scope.addPrivate(LHSVar, [&]() -> Address { 3679 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 3680 }); 3681 QualType PrivTy = (*IPriv)->getType(); 3682 if (PrivTy->isVariablyModifiedType()) { 3683 // Get array size and emit VLA type. 3684 ++Idx; 3685 Address Elem = 3686 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 3687 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 3688 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 3689 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 3690 CodeGenFunction::OpaqueValueMapping OpaqueMap( 3691 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 3692 CGF.EmitVariablyModifiedType(PrivTy); 3693 } 3694 } 3695 Scope.Privatize(); 3696 IPriv = Privates.begin(); 3697 auto ILHS = LHSExprs.begin(); 3698 auto IRHS = RHSExprs.begin(); 3699 for (auto *E : ReductionOps) { 3700 if ((*IPriv)->getType()->isArrayType()) { 3701 // Emit reduction for array section. 3702 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3703 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3704 EmitOMPAggregateReduction( 3705 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3706 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 3707 emitReductionCombiner(CGF, E); 3708 }); 3709 } else 3710 // Emit reduction for array subscript or single variable. 3711 emitReductionCombiner(CGF, E); 3712 ++IPriv; 3713 ++ILHS; 3714 ++IRHS; 3715 } 3716 Scope.ForceCleanup(); 3717 CGF.FinishFunction(); 3718 return Fn; 3719 } 3720 3721 static void emitSingleReductionCombiner(CodeGenFunction &CGF, 3722 const Expr *ReductionOp, 3723 const Expr *PrivateRef, 3724 const DeclRefExpr *LHS, 3725 const DeclRefExpr *RHS) { 3726 if (PrivateRef->getType()->isArrayType()) { 3727 // Emit reduction for array section. 3728 auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 3729 auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 3730 EmitOMPAggregateReduction( 3731 CGF, PrivateRef->getType(), LHSVar, RHSVar, 3732 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 3733 emitReductionCombiner(CGF, ReductionOp); 3734 }); 3735 } else 3736 // Emit reduction for array subscript or single variable. 3737 emitReductionCombiner(CGF, ReductionOp); 3738 } 3739 3740 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 3741 ArrayRef<const Expr *> Privates, 3742 ArrayRef<const Expr *> LHSExprs, 3743 ArrayRef<const Expr *> RHSExprs, 3744 ArrayRef<const Expr *> ReductionOps, 3745 bool WithNowait, bool SimpleReduction) { 3746 if (!CGF.HaveInsertPoint()) 3747 return; 3748 // Next code should be emitted for reduction: 3749 // 3750 // static kmp_critical_name lock = { 0 }; 3751 // 3752 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 3753 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 3754 // ... 3755 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 3756 // *(Type<n>-1*)rhs[<n>-1]); 3757 // } 3758 // 3759 // ... 3760 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 3761 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 3762 // RedList, reduce_func, &<lock>)) { 3763 // case 1: 3764 // ... 3765 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 3766 // ... 3767 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 3768 // break; 3769 // case 2: 3770 // ... 3771 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 3772 // ... 3773 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 3774 // break; 3775 // default:; 3776 // } 3777 // 3778 // if SimpleReduction is true, only the next code is generated: 3779 // ... 3780 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 3781 // ... 3782 3783 auto &C = CGM.getContext(); 3784 3785 if (SimpleReduction) { 3786 CodeGenFunction::RunCleanupsScope Scope(CGF); 3787 auto IPriv = Privates.begin(); 3788 auto ILHS = LHSExprs.begin(); 3789 auto IRHS = RHSExprs.begin(); 3790 for (auto *E : ReductionOps) { 3791 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 3792 cast<DeclRefExpr>(*IRHS)); 3793 ++IPriv; 3794 ++ILHS; 3795 ++IRHS; 3796 } 3797 return; 3798 } 3799 3800 // 1. Build a list of reduction variables. 3801 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 3802 auto Size = RHSExprs.size(); 3803 for (auto *E : Privates) { 3804 if (E->getType()->isVariablyModifiedType()) 3805 // Reserve place for array size. 3806 ++Size; 3807 } 3808 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 3809 QualType ReductionArrayTy = 3810 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3811 /*IndexTypeQuals=*/0); 3812 Address ReductionList = 3813 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 3814 auto IPriv = Privates.begin(); 3815 unsigned Idx = 0; 3816 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 3817 Address Elem = 3818 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 3819 CGF.Builder.CreateStore( 3820 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3821 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 3822 Elem); 3823 if ((*IPriv)->getType()->isVariablyModifiedType()) { 3824 // Store array size. 3825 ++Idx; 3826 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 3827 CGF.getPointerSize()); 3828 llvm::Value *Size = CGF.Builder.CreateIntCast( 3829 CGF.getVLASize( 3830 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 3831 .first, 3832 CGF.SizeTy, /*isSigned=*/false); 3833 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 3834 Elem); 3835 } 3836 } 3837 3838 // 2. Emit reduce_func(). 3839 auto *ReductionFn = emitReductionFunction( 3840 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 3841 LHSExprs, RHSExprs, ReductionOps); 3842 3843 // 3. Create static kmp_critical_name lock = { 0 }; 3844 auto *Lock = getCriticalRegionLock(".reduction"); 3845 3846 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 3847 // RedList, reduce_func, &<lock>); 3848 auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 3849 auto *ThreadId = getThreadID(CGF, Loc); 3850 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 3851 auto *RL = 3852 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), 3853 CGF.VoidPtrTy); 3854 llvm::Value *Args[] = { 3855 IdentTLoc, // ident_t *<loc> 3856 ThreadId, // i32 <gtid> 3857 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 3858 ReductionArrayTySize, // size_type sizeof(RedList) 3859 RL, // void *RedList 3860 ReductionFn, // void (*) (void *, void *) <reduce_func> 3861 Lock // kmp_critical_name *&<lock> 3862 }; 3863 auto Res = CGF.EmitRuntimeCall( 3864 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 3865 : OMPRTL__kmpc_reduce), 3866 Args); 3867 3868 // 5. Build switch(res) 3869 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 3870 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 3871 3872 // 6. Build case 1: 3873 // ... 3874 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 3875 // ... 3876 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 3877 // break; 3878 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 3879 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 3880 CGF.EmitBlock(Case1BB); 3881 3882 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 3883 llvm::Value *EndArgs[] = { 3884 IdentTLoc, // ident_t *<loc> 3885 ThreadId, // i32 <gtid> 3886 Lock // kmp_critical_name *&<lock> 3887 }; 3888 auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 3889 CodeGenFunction &CGF, PrePostActionTy &Action) { 3890 auto IPriv = Privates.begin(); 3891 auto ILHS = LHSExprs.begin(); 3892 auto IRHS = RHSExprs.begin(); 3893 for (auto *E : ReductionOps) { 3894 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 3895 cast<DeclRefExpr>(*IRHS)); 3896 ++IPriv; 3897 ++ILHS; 3898 ++IRHS; 3899 } 3900 }; 3901 RegionCodeGenTy RCG(CodeGen); 3902 CommonActionTy Action( 3903 nullptr, llvm::None, 3904 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 3905 : OMPRTL__kmpc_end_reduce), 3906 EndArgs); 3907 RCG.setAction(Action); 3908 RCG(CGF); 3909 3910 CGF.EmitBranch(DefaultBB); 3911 3912 // 7. Build case 2: 3913 // ... 3914 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 3915 // ... 3916 // break; 3917 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 3918 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 3919 CGF.EmitBlock(Case2BB); 3920 3921 auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( 3922 CodeGenFunction &CGF, PrePostActionTy &Action) { 3923 auto ILHS = LHSExprs.begin(); 3924 auto IRHS = RHSExprs.begin(); 3925 auto IPriv = Privates.begin(); 3926 for (auto *E : ReductionOps) { 3927 const Expr *XExpr = nullptr; 3928 const Expr *EExpr = nullptr; 3929 const Expr *UpExpr = nullptr; 3930 BinaryOperatorKind BO = BO_Comma; 3931 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 3932 if (BO->getOpcode() == BO_Assign) { 3933 XExpr = BO->getLHS(); 3934 UpExpr = BO->getRHS(); 3935 } 3936 } 3937 // Try to emit update expression as a simple atomic. 3938 auto *RHSExpr = UpExpr; 3939 if (RHSExpr) { 3940 // Analyze RHS part of the whole expression. 3941 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 3942 RHSExpr->IgnoreParenImpCasts())) { 3943 // If this is a conditional operator, analyze its condition for 3944 // min/max reduction operator. 3945 RHSExpr = ACO->getCond(); 3946 } 3947 if (auto *BORHS = 3948 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 3949 EExpr = BORHS->getRHS(); 3950 BO = BORHS->getOpcode(); 3951 } 3952 } 3953 if (XExpr) { 3954 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3955 auto &&AtomicRedGen = [BO, VD, IPriv, 3956 Loc](CodeGenFunction &CGF, const Expr *XExpr, 3957 const Expr *EExpr, const Expr *UpExpr) { 3958 LValue X = CGF.EmitLValue(XExpr); 3959 RValue E; 3960 if (EExpr) 3961 E = CGF.EmitAnyExpr(EExpr); 3962 CGF.EmitOMPAtomicSimpleUpdateExpr( 3963 X, E, BO, /*IsXLHSInRHSPart=*/true, 3964 llvm::AtomicOrdering::Monotonic, Loc, 3965 [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { 3966 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3967 PrivateScope.addPrivate( 3968 VD, [&CGF, VD, XRValue, Loc]() -> Address { 3969 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 3970 CGF.emitOMPSimpleStore( 3971 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 3972 VD->getType().getNonReferenceType(), Loc); 3973 return LHSTemp; 3974 }); 3975 (void)PrivateScope.Privatize(); 3976 return CGF.EmitAnyExpr(UpExpr); 3977 }); 3978 }; 3979 if ((*IPriv)->getType()->isArrayType()) { 3980 // Emit atomic reduction for array section. 3981 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3982 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 3983 AtomicRedGen, XExpr, EExpr, UpExpr); 3984 } else 3985 // Emit atomic reduction for array subscript or single variable. 3986 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 3987 } else { 3988 // Emit as a critical region. 3989 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 3990 const Expr *, const Expr *) { 3991 auto &RT = CGF.CGM.getOpenMPRuntime(); 3992 RT.emitCriticalRegion( 3993 CGF, ".atomic_reduction", 3994 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 3995 Action.Enter(CGF); 3996 emitReductionCombiner(CGF, E); 3997 }, 3998 Loc); 3999 }; 4000 if ((*IPriv)->getType()->isArrayType()) { 4001 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4002 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4003 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4004 CritRedGen); 4005 } else 4006 CritRedGen(CGF, nullptr, nullptr, nullptr); 4007 } 4008 ++ILHS; 4009 ++IRHS; 4010 ++IPriv; 4011 } 4012 }; 4013 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 4014 if (!WithNowait) { 4015 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 4016 llvm::Value *EndArgs[] = { 4017 IdentTLoc, // ident_t *<loc> 4018 ThreadId, // i32 <gtid> 4019 Lock // kmp_critical_name *&<lock> 4020 }; 4021 CommonActionTy Action(nullptr, llvm::None, 4022 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 4023 EndArgs); 4024 AtomicRCG.setAction(Action); 4025 AtomicRCG(CGF); 4026 } else 4027 AtomicRCG(CGF); 4028 4029 CGF.EmitBranch(DefaultBB); 4030 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 4031 } 4032 4033 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 4034 SourceLocation Loc) { 4035 if (!CGF.HaveInsertPoint()) 4036 return; 4037 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 4038 // global_tid); 4039 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 4040 // Ignore return result until untied tasks are supported. 4041 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 4042 } 4043 4044 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 4045 OpenMPDirectiveKind InnerKind, 4046 const RegionCodeGenTy &CodeGen, 4047 bool HasCancel) { 4048 if (!CGF.HaveInsertPoint()) 4049 return; 4050 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 4051 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 4052 } 4053 4054 namespace { 4055 enum RTCancelKind { 4056 CancelNoreq = 0, 4057 CancelParallel = 1, 4058 CancelLoop = 2, 4059 CancelSections = 3, 4060 CancelTaskgroup = 4 4061 }; 4062 } // anonymous namespace 4063 4064 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 4065 RTCancelKind CancelKind = CancelNoreq; 4066 if (CancelRegion == OMPD_parallel) 4067 CancelKind = CancelParallel; 4068 else if (CancelRegion == OMPD_for) 4069 CancelKind = CancelLoop; 4070 else if (CancelRegion == OMPD_sections) 4071 CancelKind = CancelSections; 4072 else { 4073 assert(CancelRegion == OMPD_taskgroup); 4074 CancelKind = CancelTaskgroup; 4075 } 4076 return CancelKind; 4077 } 4078 4079 void CGOpenMPRuntime::emitCancellationPointCall( 4080 CodeGenFunction &CGF, SourceLocation Loc, 4081 OpenMPDirectiveKind CancelRegion) { 4082 if (!CGF.HaveInsertPoint()) 4083 return; 4084 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 4085 // global_tid, kmp_int32 cncl_kind); 4086 if (auto *OMPRegionInfo = 4087 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4088 if (OMPRegionInfo->hasCancel()) { 4089 llvm::Value *Args[] = { 4090 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 4091 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4092 // Ignore return result until untied tasks are supported. 4093 auto *Result = CGF.EmitRuntimeCall( 4094 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 4095 // if (__kmpc_cancellationpoint()) { 4096 // __kmpc_cancel_barrier(); 4097 // exit from construct; 4098 // } 4099 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4100 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4101 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4102 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4103 CGF.EmitBlock(ExitBB); 4104 // __kmpc_cancel_barrier(); 4105 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 4106 // exit from construct; 4107 auto CancelDest = 4108 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4109 CGF.EmitBranchThroughCleanup(CancelDest); 4110 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4111 } 4112 } 4113 } 4114 4115 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 4116 const Expr *IfCond, 4117 OpenMPDirectiveKind CancelRegion) { 4118 if (!CGF.HaveInsertPoint()) 4119 return; 4120 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 4121 // kmp_int32 cncl_kind); 4122 if (auto *OMPRegionInfo = 4123 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 4124 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 4125 PrePostActionTy &) { 4126 auto &RT = CGF.CGM.getOpenMPRuntime(); 4127 llvm::Value *Args[] = { 4128 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 4129 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 4130 // Ignore return result until untied tasks are supported. 4131 auto *Result = CGF.EmitRuntimeCall( 4132 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 4133 // if (__kmpc_cancel()) { 4134 // __kmpc_cancel_barrier(); 4135 // exit from construct; 4136 // } 4137 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 4138 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 4139 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 4140 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 4141 CGF.EmitBlock(ExitBB); 4142 // __kmpc_cancel_barrier(); 4143 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 4144 // exit from construct; 4145 auto CancelDest = 4146 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 4147 CGF.EmitBranchThroughCleanup(CancelDest); 4148 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 4149 }; 4150 if (IfCond) 4151 emitOMPIfClause(CGF, IfCond, ThenGen, 4152 [](CodeGenFunction &, PrePostActionTy &) {}); 4153 else { 4154 RegionCodeGenTy ThenRCG(ThenGen); 4155 ThenRCG(CGF); 4156 } 4157 } 4158 } 4159 4160 /// \brief Obtain information that uniquely identifies a target entry. This 4161 /// consists of the file and device IDs as well as line number associated with 4162 /// the relevant entry source location. 4163 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 4164 unsigned &DeviceID, unsigned &FileID, 4165 unsigned &LineNum) { 4166 4167 auto &SM = C.getSourceManager(); 4168 4169 // The loc should be always valid and have a file ID (the user cannot use 4170 // #pragma directives in macros) 4171 4172 assert(Loc.isValid() && "Source location is expected to be always valid."); 4173 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 4174 4175 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 4176 assert(PLoc.isValid() && "Source location is expected to be always valid."); 4177 4178 llvm::sys::fs::UniqueID ID; 4179 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 4180 llvm_unreachable("Source file with target region no longer exists!"); 4181 4182 DeviceID = ID.getDevice(); 4183 FileID = ID.getFile(); 4184 LineNum = PLoc.getLine(); 4185 } 4186 4187 void CGOpenMPRuntime::emitTargetOutlinedFunction( 4188 const OMPExecutableDirective &D, StringRef ParentName, 4189 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4190 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4191 assert(!ParentName.empty() && "Invalid target region parent name!"); 4192 4193 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 4194 IsOffloadEntry, CodeGen); 4195 } 4196 4197 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 4198 const OMPExecutableDirective &D, StringRef ParentName, 4199 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 4200 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 4201 // Create a unique name for the entry function using the source location 4202 // information of the current target region. The name will be something like: 4203 // 4204 // __omp_offloading_DD_FFFF_PP_lBB 4205 // 4206 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 4207 // mangled name of the function that encloses the target region and BB is the 4208 // line number of the target region. 4209 4210 unsigned DeviceID; 4211 unsigned FileID; 4212 unsigned Line; 4213 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 4214 Line); 4215 SmallString<64> EntryFnName; 4216 { 4217 llvm::raw_svector_ostream OS(EntryFnName); 4218 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 4219 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 4220 } 4221 4222 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4223 4224 CodeGenFunction CGF(CGM, true); 4225 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 4226 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4227 4228 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 4229 4230 // If this target outline function is not an offload entry, we don't need to 4231 // register it. 4232 if (!IsOffloadEntry) 4233 return; 4234 4235 // The target region ID is used by the runtime library to identify the current 4236 // target region, so it only has to be unique and not necessarily point to 4237 // anything. It could be the pointer to the outlined function that implements 4238 // the target region, but we aren't using that so that the compiler doesn't 4239 // need to keep that, and could therefore inline the host function if proven 4240 // worthwhile during optimization. In the other hand, if emitting code for the 4241 // device, the ID has to be the function address so that it can retrieved from 4242 // the offloading entry and launched by the runtime library. We also mark the 4243 // outlined function to have external linkage in case we are emitting code for 4244 // the device, because these functions will be entry points to the device. 4245 4246 if (CGM.getLangOpts().OpenMPIsDevice) { 4247 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 4248 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 4249 } else 4250 OutlinedFnID = new llvm::GlobalVariable( 4251 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 4252 llvm::GlobalValue::PrivateLinkage, 4253 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 4254 4255 // Register the information for the entry associated with this target region. 4256 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 4257 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); 4258 } 4259 4260 /// \brief Emit the num_teams clause of an enclosed teams directive at the 4261 /// target region scope. If there is no teams directive associated with the 4262 /// target directive, or if there is no num_teams clause associated with the 4263 /// enclosed teams directive, return nullptr. 4264 static llvm::Value * 4265 emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4266 CodeGenFunction &CGF, 4267 const OMPExecutableDirective &D) { 4268 4269 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4270 "teams directive expected to be " 4271 "emitted only for the host!"); 4272 4273 // FIXME: For the moment we do not support combined directives with target and 4274 // teams, so we do not expect to get any num_teams clause in the provided 4275 // directive. Once we support that, this assertion can be replaced by the 4276 // actual emission of the clause expression. 4277 assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr && 4278 "Not expecting clause in directive."); 4279 4280 // If the current target region has a teams region enclosed, we need to get 4281 // the number of teams to pass to the runtime function call. This is done 4282 // by generating the expression in a inlined region. This is required because 4283 // the expression is captured in the enclosing target environment when the 4284 // teams directive is not combined with target. 4285 4286 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4287 4288 // FIXME: Accommodate other combined directives with teams when they become 4289 // available. 4290 if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) { 4291 if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { 4292 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4293 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4294 llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); 4295 return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, 4296 /*IsSigned=*/true); 4297 } 4298 4299 // If we have an enclosed teams directive but no num_teams clause we use 4300 // the default value 0. 4301 return CGF.Builder.getInt32(0); 4302 } 4303 4304 // No teams associated with the directive. 4305 return nullptr; 4306 } 4307 4308 /// \brief Emit the thread_limit clause of an enclosed teams directive at the 4309 /// target region scope. If there is no teams directive associated with the 4310 /// target directive, or if there is no thread_limit clause associated with the 4311 /// enclosed teams directive, return nullptr. 4312 static llvm::Value * 4313 emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, 4314 CodeGenFunction &CGF, 4315 const OMPExecutableDirective &D) { 4316 4317 assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " 4318 "teams directive expected to be " 4319 "emitted only for the host!"); 4320 4321 // FIXME: For the moment we do not support combined directives with target and 4322 // teams, so we do not expect to get any thread_limit clause in the provided 4323 // directive. Once we support that, this assertion can be replaced by the 4324 // actual emission of the clause expression. 4325 assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr && 4326 "Not expecting clause in directive."); 4327 4328 // If the current target region has a teams region enclosed, we need to get 4329 // the thread limit to pass to the runtime function call. This is done 4330 // by generating the expression in a inlined region. This is required because 4331 // the expression is captured in the enclosing target environment when the 4332 // teams directive is not combined with target. 4333 4334 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4335 4336 // FIXME: Accommodate other combined directives with teams when they become 4337 // available. 4338 if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) { 4339 if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { 4340 CGOpenMPInnerExprInfo CGInfo(CGF, CS); 4341 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 4342 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); 4343 return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, 4344 /*IsSigned=*/true); 4345 } 4346 4347 // If we have an enclosed teams directive but no thread_limit clause we use 4348 // the default value 0. 4349 return CGF.Builder.getInt32(0); 4350 } 4351 4352 // No teams associated with the directive. 4353 return nullptr; 4354 } 4355 4356 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 4357 const OMPExecutableDirective &D, 4358 llvm::Value *OutlinedFn, 4359 llvm::Value *OutlinedFnID, 4360 const Expr *IfCond, const Expr *Device, 4361 ArrayRef<llvm::Value *> CapturedVars) { 4362 if (!CGF.HaveInsertPoint()) 4363 return; 4364 /// \brief Values for bit flags used to specify the mapping type for 4365 /// offloading. 4366 enum OpenMPOffloadMappingFlags { 4367 /// \brief Allocate memory on the device and move data from host to device. 4368 OMP_MAP_TO = 0x01, 4369 /// \brief Allocate memory on the device and move data from device to host. 4370 OMP_MAP_FROM = 0x02, 4371 /// \brief The element passed to the device is a pointer. 4372 OMP_MAP_PTR = 0x20, 4373 /// \brief Pass the element to the device by value. 4374 OMP_MAP_BYCOPY = 0x80, 4375 }; 4376 4377 enum OpenMPOffloadingReservedDeviceIDs { 4378 /// \brief Device ID if the device was not defined, runtime should get it 4379 /// from environment variables in the spec. 4380 OMP_DEVICEID_UNDEF = -1, 4381 }; 4382 4383 assert(OutlinedFn && "Invalid outlined function!"); 4384 4385 auto &Ctx = CGF.getContext(); 4386 4387 // Fill up the arrays with the all the captured variables. 4388 SmallVector<llvm::Value *, 16> BasePointers; 4389 SmallVector<llvm::Value *, 16> Pointers; 4390 SmallVector<llvm::Value *, 16> Sizes; 4391 SmallVector<unsigned, 16> MapTypes; 4392 4393 bool hasVLACaptures = false; 4394 4395 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 4396 auto RI = CS.getCapturedRecordDecl()->field_begin(); 4397 // auto II = CS.capture_init_begin(); 4398 auto CV = CapturedVars.begin(); 4399 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 4400 CE = CS.capture_end(); 4401 CI != CE; ++CI, ++RI, ++CV) { 4402 StringRef Name; 4403 QualType Ty; 4404 llvm::Value *BasePointer; 4405 llvm::Value *Pointer; 4406 llvm::Value *Size; 4407 unsigned MapType; 4408 4409 // VLA sizes are passed to the outlined region by copy. 4410 if (CI->capturesVariableArrayType()) { 4411 BasePointer = Pointer = *CV; 4412 Size = CGF.getTypeSize(RI->getType()); 4413 // Copy to the device as an argument. No need to retrieve it. 4414 MapType = OMP_MAP_BYCOPY; 4415 hasVLACaptures = true; 4416 } else if (CI->capturesThis()) { 4417 BasePointer = Pointer = *CV; 4418 const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr()); 4419 Size = CGF.getTypeSize(PtrTy->getPointeeType()); 4420 // Default map type. 4421 MapType = OMP_MAP_TO | OMP_MAP_FROM; 4422 } else if (CI->capturesVariableByCopy()) { 4423 MapType = OMP_MAP_BYCOPY; 4424 if (!RI->getType()->isAnyPointerType()) { 4425 // If the field is not a pointer, we need to save the actual value and 4426 // load it as a void pointer. 4427 auto DstAddr = CGF.CreateMemTemp( 4428 Ctx.getUIntPtrType(), 4429 Twine(CI->getCapturedVar()->getName()) + ".casted"); 4430 LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 4431 4432 auto *SrcAddrVal = CGF.EmitScalarConversion( 4433 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 4434 Ctx.getPointerType(RI->getType()), SourceLocation()); 4435 LValue SrcLV = 4436 CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType()); 4437 4438 // Store the value using the source type pointer. 4439 CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV); 4440 4441 // Load the value using the destination type pointer. 4442 BasePointer = Pointer = 4443 CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 4444 } else { 4445 MapType |= OMP_MAP_PTR; 4446 BasePointer = Pointer = *CV; 4447 } 4448 Size = CGF.getTypeSize(RI->getType()); 4449 } else { 4450 assert(CI->capturesVariable() && "Expected captured reference."); 4451 BasePointer = Pointer = *CV; 4452 4453 const ReferenceType *PtrTy = 4454 cast<ReferenceType>(RI->getType().getTypePtr()); 4455 QualType ElementType = PtrTy->getPointeeType(); 4456 Size = CGF.getTypeSize(ElementType); 4457 // The default map type for a scalar/complex type is 'to' because by 4458 // default the value doesn't have to be retrieved. For an aggregate type, 4459 // the default is 'tofrom'. 4460 MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) 4461 : OMP_MAP_TO; 4462 if (ElementType->isAnyPointerType()) 4463 MapType |= OMP_MAP_PTR; 4464 } 4465 4466 BasePointers.push_back(BasePointer); 4467 Pointers.push_back(Pointer); 4468 Sizes.push_back(Size); 4469 MapTypes.push_back(MapType); 4470 } 4471 4472 // Keep track on whether the host function has to be executed. 4473 auto OffloadErrorQType = 4474 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 4475 auto OffloadError = CGF.MakeAddrLValue( 4476 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 4477 OffloadErrorQType); 4478 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 4479 OffloadError); 4480 4481 // Fill up the pointer arrays and transfer execution to the device. 4482 auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, 4483 hasVLACaptures, Device, OutlinedFnID, OffloadError, 4484 OffloadErrorQType, 4485 &D](CodeGenFunction &CGF, PrePostActionTy &) { 4486 auto &RT = CGF.CGM.getOpenMPRuntime(); 4487 unsigned PointerNumVal = BasePointers.size(); 4488 llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); 4489 llvm::Value *BasePointersArray; 4490 llvm::Value *PointersArray; 4491 llvm::Value *SizesArray; 4492 llvm::Value *MapTypesArray; 4493 4494 if (PointerNumVal) { 4495 llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); 4496 QualType PointerArrayType = Ctx.getConstantArrayType( 4497 Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 4498 /*IndexTypeQuals=*/0); 4499 4500 BasePointersArray = 4501 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 4502 PointersArray = 4503 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 4504 4505 // If we don't have any VLA types, we can use a constant array for the map 4506 // sizes, otherwise we need to fill up the arrays as we do for the 4507 // pointers. 4508 if (hasVLACaptures) { 4509 QualType SizeArrayType = Ctx.getConstantArrayType( 4510 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 4511 /*IndexTypeQuals=*/0); 4512 SizesArray = 4513 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 4514 } else { 4515 // We expect all the sizes to be constant, so we collect them to create 4516 // a constant array. 4517 SmallVector<llvm::Constant *, 16> ConstSizes; 4518 for (auto S : Sizes) 4519 ConstSizes.push_back(cast<llvm::Constant>(S)); 4520 4521 auto *SizesArrayInit = llvm::ConstantArray::get( 4522 llvm::ArrayType::get(CGF.CGM.SizeTy, ConstSizes.size()), 4523 ConstSizes); 4524 auto *SizesArrayGbl = new llvm::GlobalVariable( 4525 CGF.CGM.getModule(), SizesArrayInit->getType(), 4526 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 4527 SizesArrayInit, ".offload_sizes"); 4528 SizesArrayGbl->setUnnamedAddr(true); 4529 SizesArray = SizesArrayGbl; 4530 } 4531 4532 // The map types are always constant so we don't need to generate code to 4533 // fill arrays. Instead, we create an array constant. 4534 llvm::Constant *MapTypesArrayInit = 4535 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 4536 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 4537 CGF.CGM.getModule(), MapTypesArrayInit->getType(), 4538 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 4539 MapTypesArrayInit, ".offload_maptypes"); 4540 MapTypesArrayGbl->setUnnamedAddr(true); 4541 MapTypesArray = MapTypesArrayGbl; 4542 4543 for (unsigned i = 0; i < PointerNumVal; ++i) { 4544 llvm::Value *BPVal = BasePointers[i]; 4545 if (BPVal->getType()->isPointerTy()) 4546 BPVal = CGF.Builder.CreateBitCast(BPVal, CGF.VoidPtrTy); 4547 else { 4548 assert(BPVal->getType()->isIntegerTy() && 4549 "If not a pointer, the value type must be an integer."); 4550 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGF.VoidPtrTy); 4551 } 4552 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 4553 llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), 4554 BasePointersArray, 0, i); 4555 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 4556 CGF.Builder.CreateStore(BPVal, BPAddr); 4557 4558 llvm::Value *PVal = Pointers[i]; 4559 if (PVal->getType()->isPointerTy()) 4560 PVal = CGF.Builder.CreateBitCast(PVal, CGF.VoidPtrTy); 4561 else { 4562 assert(PVal->getType()->isIntegerTy() && 4563 "If not a pointer, the value type must be an integer."); 4564 PVal = CGF.Builder.CreateIntToPtr(PVal, CGF.VoidPtrTy); 4565 } 4566 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 4567 llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), PointersArray, 4568 0, i); 4569 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 4570 CGF.Builder.CreateStore(PVal, PAddr); 4571 4572 if (hasVLACaptures) { 4573 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 4574 llvm::ArrayType::get(CGF.SizeTy, PointerNumVal), SizesArray, 4575 /*Idx0=*/0, 4576 /*Idx1=*/i); 4577 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 4578 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast( 4579 Sizes[i], CGF.SizeTy, /*isSigned=*/true), 4580 SAddr); 4581 } 4582 } 4583 4584 BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4585 llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), BasePointersArray, 4586 /*Idx0=*/0, /*Idx1=*/0); 4587 PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4588 llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), PointersArray, 4589 /*Idx0=*/0, 4590 /*Idx1=*/0); 4591 SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4592 llvm::ArrayType::get(CGF.SizeTy, PointerNumVal), SizesArray, 4593 /*Idx0=*/0, /*Idx1=*/0); 4594 MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4595 llvm::ArrayType::get(CGF.Int32Ty, PointerNumVal), MapTypesArray, 4596 /*Idx0=*/0, 4597 /*Idx1=*/0); 4598 4599 } else { 4600 BasePointersArray = llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy); 4601 PointersArray = llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy); 4602 SizesArray = llvm::ConstantPointerNull::get(CGF.SizeTy->getPointerTo()); 4603 MapTypesArray = 4604 llvm::ConstantPointerNull::get(CGF.Int32Ty->getPointerTo()); 4605 } 4606 4607 // On top of the arrays that were filled up, the target offloading call 4608 // takes as arguments the device id as well as the host pointer. The host 4609 // pointer is used by the runtime library to identify the current target 4610 // region, so it only has to be unique and not necessarily point to 4611 // anything. It could be the pointer to the outlined function that 4612 // implements the target region, but we aren't using that so that the 4613 // compiler doesn't need to keep that, and could therefore inline the host 4614 // function if proven worthwhile during optimization. 4615 4616 // From this point on, we need to have an ID of the target region defined. 4617 assert(OutlinedFnID && "Invalid outlined function ID!"); 4618 4619 // Emit device ID if any. 4620 llvm::Value *DeviceID; 4621 if (Device) 4622 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4623 CGF.Int32Ty, /*isSigned=*/true); 4624 else 4625 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 4626 4627 // Return value of the runtime offloading call. 4628 llvm::Value *Return; 4629 4630 auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); 4631 auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); 4632 4633 // If we have NumTeams defined this means that we have an enclosed teams 4634 // region. Therefore we also expect to have ThreadLimit defined. These two 4635 // values should be defined in the presence of a teams directive, regardless 4636 // of having any clauses associated. If the user is using teams but no 4637 // clauses, these two values will be the default that should be passed to 4638 // the runtime library - a 32-bit integer with the value zero. 4639 if (NumTeams) { 4640 assert(ThreadLimit && "Thread limit expression should be available along " 4641 "with number of teams."); 4642 llvm::Value *OffloadingArgs[] = { 4643 DeviceID, OutlinedFnID, PointerNum, 4644 BasePointersArray, PointersArray, SizesArray, 4645 MapTypesArray, NumTeams, ThreadLimit}; 4646 Return = CGF.EmitRuntimeCall( 4647 RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); 4648 } else { 4649 llvm::Value *OffloadingArgs[] = { 4650 DeviceID, OutlinedFnID, PointerNum, BasePointersArray, 4651 PointersArray, SizesArray, MapTypesArray}; 4652 Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), 4653 OffloadingArgs); 4654 } 4655 4656 CGF.EmitStoreOfScalar(Return, OffloadError); 4657 }; 4658 4659 // Notify that the host version must be executed. 4660 auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { 4661 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), 4662 OffloadError); 4663 }; 4664 4665 // If we have a target function ID it means that we need to support 4666 // offloading, otherwise, just execute on the host. We need to execute on host 4667 // regardless of the conditional in the if clause if, e.g., the user do not 4668 // specify target triples. 4669 if (OutlinedFnID) { 4670 if (IfCond) 4671 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 4672 else { 4673 RegionCodeGenTy ThenRCG(ThenGen); 4674 ThenRCG(CGF); 4675 } 4676 } else { 4677 RegionCodeGenTy ElseRCG(ElseGen); 4678 ElseRCG(CGF); 4679 } 4680 4681 // Check the error code and execute the host version if required. 4682 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 4683 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 4684 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 4685 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 4686 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 4687 4688 CGF.EmitBlock(OffloadFailedBlock); 4689 CGF.Builder.CreateCall(OutlinedFn, BasePointers); 4690 CGF.EmitBranch(OffloadContBlock); 4691 4692 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 4693 } 4694 4695 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 4696 StringRef ParentName) { 4697 if (!S) 4698 return; 4699 4700 // If we find a OMP target directive, codegen the outline function and 4701 // register the result. 4702 // FIXME: Add other directives with target when they become supported. 4703 bool isTargetDirective = isa<OMPTargetDirective>(S); 4704 4705 if (isTargetDirective) { 4706 auto *E = cast<OMPExecutableDirective>(S); 4707 unsigned DeviceID; 4708 unsigned FileID; 4709 unsigned Line; 4710 getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, 4711 FileID, Line); 4712 4713 // Is this a target region that should not be emitted as an entry point? If 4714 // so just signal we are done with this target region. 4715 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 4716 ParentName, Line)) 4717 return; 4718 4719 llvm::Function *Fn; 4720 llvm::Constant *Addr; 4721 std::tie(Fn, Addr) = 4722 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( 4723 CGM, cast<OMPTargetDirective>(*E), ParentName, 4724 /*isOffloadEntry=*/true); 4725 assert(Fn && Addr && "Target region emission failed."); 4726 return; 4727 } 4728 4729 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 4730 if (!E->getAssociatedStmt()) 4731 return; 4732 4733 scanForTargetRegionsFunctions( 4734 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 4735 ParentName); 4736 return; 4737 } 4738 4739 // If this is a lambda function, look into its body. 4740 if (auto *L = dyn_cast<LambdaExpr>(S)) 4741 S = L->getBody(); 4742 4743 // Keep looking for target regions recursively. 4744 for (auto *II : S->children()) 4745 scanForTargetRegionsFunctions(II, ParentName); 4746 } 4747 4748 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 4749 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 4750 4751 // If emitting code for the host, we do not process FD here. Instead we do 4752 // the normal code generation. 4753 if (!CGM.getLangOpts().OpenMPIsDevice) 4754 return false; 4755 4756 // Try to detect target regions in the function. 4757 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 4758 4759 // We should not emit any function othen that the ones created during the 4760 // scanning. Therefore, we signal that this function is completely dealt 4761 // with. 4762 return true; 4763 } 4764 4765 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 4766 if (!CGM.getLangOpts().OpenMPIsDevice) 4767 return false; 4768 4769 // Check if there are Ctors/Dtors in this declaration and look for target 4770 // regions in it. We use the complete variant to produce the kernel name 4771 // mangling. 4772 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 4773 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 4774 for (auto *Ctor : RD->ctors()) { 4775 StringRef ParentName = 4776 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 4777 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 4778 } 4779 auto *Dtor = RD->getDestructor(); 4780 if (Dtor) { 4781 StringRef ParentName = 4782 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 4783 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 4784 } 4785 } 4786 4787 // If we are in target mode we do not emit any global (declare target is not 4788 // implemented yet). Therefore we signal that GD was processed in this case. 4789 return true; 4790 } 4791 4792 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 4793 auto *VD = GD.getDecl(); 4794 if (isa<FunctionDecl>(VD)) 4795 return emitTargetFunctions(GD); 4796 4797 return emitTargetGlobalVariable(GD); 4798 } 4799 4800 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 4801 // If we have offloading in the current module, we need to emit the entries 4802 // now and register the offloading descriptor. 4803 createOffloadEntriesAndInfoMetadata(); 4804 4805 // Create and register the offloading binary descriptors. This is the main 4806 // entity that captures all the information about offloading in the current 4807 // compilation unit. 4808 return createOffloadingBinaryDescriptorRegistration(); 4809 } 4810 4811 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 4812 const OMPExecutableDirective &D, 4813 SourceLocation Loc, 4814 llvm::Value *OutlinedFn, 4815 ArrayRef<llvm::Value *> CapturedVars) { 4816 if (!CGF.HaveInsertPoint()) 4817 return; 4818 4819 auto *RTLoc = emitUpdateLocation(CGF, Loc); 4820 CodeGenFunction::RunCleanupsScope Scope(CGF); 4821 4822 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 4823 llvm::Value *Args[] = { 4824 RTLoc, 4825 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 4826 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 4827 llvm::SmallVector<llvm::Value *, 16> RealArgs; 4828 RealArgs.append(std::begin(Args), std::end(Args)); 4829 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 4830 4831 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 4832 CGF.EmitRuntimeCall(RTLFn, RealArgs); 4833 } 4834 4835 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 4836 const Expr *NumTeams, 4837 const Expr *ThreadLimit, 4838 SourceLocation Loc) { 4839 if (!CGF.HaveInsertPoint()) 4840 return; 4841 4842 auto *RTLoc = emitUpdateLocation(CGF, Loc); 4843 4844 llvm::Value *NumTeamsVal = 4845 (NumTeams) 4846 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 4847 CGF.CGM.Int32Ty, /* isSigned = */ true) 4848 : CGF.Builder.getInt32(0); 4849 4850 llvm::Value *ThreadLimitVal = 4851 (ThreadLimit) 4852 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 4853 CGF.CGM.Int32Ty, /* isSigned = */ true) 4854 : CGF.Builder.getInt32(0); 4855 4856 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 4857 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 4858 ThreadLimitVal}; 4859 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 4860 PushNumTeamsArgs); 4861 } 4862