1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/StmtOpenMP.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/Bitcode/ReaderWriter.h" 22 #include "llvm/IR/CallSite.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/GlobalValue.h" 25 #include "llvm/IR/Value.h" 26 #include "llvm/Support/Format.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 30 using namespace clang; 31 using namespace CodeGen; 32 33 namespace { 34 /// \brief Base class for handling code generation inside OpenMP regions. 35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 36 public: 37 /// \brief Kinds of OpenMP regions used in codegen. 38 enum CGOpenMPRegionKind { 39 /// \brief Region with outlined function for standalone 'parallel' 40 /// directive. 41 ParallelOutlinedRegion, 42 /// \brief Region with outlined function for standalone 'task' directive. 43 TaskOutlinedRegion, 44 /// \brief Region for constructs that do not require function outlining, 45 /// like 'for', 'sections', 'atomic' etc. directives. 46 InlinedRegion, 47 /// \brief Region with outlined function for standalone 'target' directive. 48 TargetRegion, 49 }; 50 51 CGOpenMPRegionInfo(const CapturedStmt &CS, 52 const CGOpenMPRegionKind RegionKind, 53 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 54 bool HasCancel) 55 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 56 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 57 58 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 59 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 60 bool HasCancel) 61 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 62 Kind(Kind), HasCancel(HasCancel) {} 63 64 /// \brief Get a variable or parameter for storing global thread id 65 /// inside OpenMP construct. 66 virtual const VarDecl *getThreadIDVariable() const = 0; 67 68 /// \brief Emit the captured statement body. 69 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 70 71 /// \brief Get an LValue for the current ThreadID variable. 72 /// \return LValue for thread id variable. This LValue always has type int32*. 73 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 74 75 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 76 77 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 78 79 bool hasCancel() const { return HasCancel; } 80 81 static bool classof(const CGCapturedStmtInfo *Info) { 82 return Info->getKind() == CR_OpenMP; 83 } 84 85 protected: 86 CGOpenMPRegionKind RegionKind; 87 RegionCodeGenTy CodeGen; 88 OpenMPDirectiveKind Kind; 89 bool HasCancel; 90 }; 91 92 /// \brief API for captured statement code generation in OpenMP constructs. 93 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 94 public: 95 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 96 const RegionCodeGenTy &CodeGen, 97 OpenMPDirectiveKind Kind, bool HasCancel) 98 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 99 HasCancel), 100 ThreadIDVar(ThreadIDVar) { 101 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 102 } 103 /// \brief Get a variable or parameter for storing global thread id 104 /// inside OpenMP construct. 105 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 106 107 /// \brief Get the name of the capture helper. 108 StringRef getHelperName() const override { return ".omp_outlined."; } 109 110 static bool classof(const CGCapturedStmtInfo *Info) { 111 return CGOpenMPRegionInfo::classof(Info) && 112 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 113 ParallelOutlinedRegion; 114 } 115 116 private: 117 /// \brief A variable or parameter storing global thread id for OpenMP 118 /// constructs. 119 const VarDecl *ThreadIDVar; 120 }; 121 122 /// \brief API for captured statement code generation in OpenMP constructs. 123 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 124 public: 125 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 126 const VarDecl *ThreadIDVar, 127 const RegionCodeGenTy &CodeGen, 128 OpenMPDirectiveKind Kind, bool HasCancel) 129 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 130 ThreadIDVar(ThreadIDVar) { 131 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 132 } 133 /// \brief Get a variable or parameter for storing global thread id 134 /// inside OpenMP construct. 135 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 136 137 /// \brief Get an LValue for the current ThreadID variable. 138 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 139 140 /// \brief Get the name of the capture helper. 141 StringRef getHelperName() const override { return ".omp_outlined."; } 142 143 static bool classof(const CGCapturedStmtInfo *Info) { 144 return CGOpenMPRegionInfo::classof(Info) && 145 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 146 TaskOutlinedRegion; 147 } 148 149 private: 150 /// \brief A variable or parameter storing global thread id for OpenMP 151 /// constructs. 152 const VarDecl *ThreadIDVar; 153 }; 154 155 /// \brief API for inlined captured statement code generation in OpenMP 156 /// constructs. 157 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 158 public: 159 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 160 const RegionCodeGenTy &CodeGen, 161 OpenMPDirectiveKind Kind, bool HasCancel) 162 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 163 OldCSI(OldCSI), 164 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 165 // \brief Retrieve the value of the context parameter. 166 llvm::Value *getContextValue() const override { 167 if (OuterRegionInfo) 168 return OuterRegionInfo->getContextValue(); 169 llvm_unreachable("No context value for inlined OpenMP region"); 170 } 171 void setContextValue(llvm::Value *V) override { 172 if (OuterRegionInfo) { 173 OuterRegionInfo->setContextValue(V); 174 return; 175 } 176 llvm_unreachable("No context value for inlined OpenMP region"); 177 } 178 /// \brief Lookup the captured field decl for a variable. 179 const FieldDecl *lookup(const VarDecl *VD) const override { 180 if (OuterRegionInfo) 181 return OuterRegionInfo->lookup(VD); 182 // If there is no outer outlined region,no need to lookup in a list of 183 // captured variables, we can use the original one. 184 return nullptr; 185 } 186 FieldDecl *getThisFieldDecl() const override { 187 if (OuterRegionInfo) 188 return OuterRegionInfo->getThisFieldDecl(); 189 return nullptr; 190 } 191 /// \brief Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { 194 if (OuterRegionInfo) 195 return OuterRegionInfo->getThreadIDVariable(); 196 return nullptr; 197 } 198 199 /// \brief Get the name of the capture helper. 200 StringRef getHelperName() const override { 201 if (auto *OuterRegionInfo = getOldCSI()) 202 return OuterRegionInfo->getHelperName(); 203 llvm_unreachable("No helper name for inlined OpenMP construct"); 204 } 205 206 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 207 208 static bool classof(const CGCapturedStmtInfo *Info) { 209 return CGOpenMPRegionInfo::classof(Info) && 210 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 211 } 212 213 private: 214 /// \brief CodeGen info about outer OpenMP region. 215 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 216 CGOpenMPRegionInfo *OuterRegionInfo; 217 }; 218 219 /// \brief API for captured statement code generation in OpenMP target 220 /// constructs. For this captures, implicit parameters are used instead of the 221 /// captured fields. The name of the target region has to be unique in a given 222 /// application so it is provided by the client, because only the client has 223 /// the information to generate that. 224 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { 225 public: 226 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 227 const RegionCodeGenTy &CodeGen, StringRef HelperName) 228 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 229 /*HasCancel=*/false), 230 HelperName(HelperName) {} 231 232 /// \brief This is unused for target regions because each starts executing 233 /// with a single thread. 234 const VarDecl *getThreadIDVariable() const override { return nullptr; } 235 236 /// \brief Get the name of the capture helper. 237 StringRef getHelperName() const override { return HelperName; } 238 239 static bool classof(const CGCapturedStmtInfo *Info) { 240 return CGOpenMPRegionInfo::classof(Info) && 241 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 242 } 243 244 private: 245 StringRef HelperName; 246 }; 247 248 /// \brief RAII for emitting code of OpenMP constructs. 249 class InlinedOpenMPRegionRAII { 250 CodeGenFunction &CGF; 251 252 public: 253 /// \brief Constructs region for combined constructs. 254 /// \param CodeGen Code generation sequence for combined directives. Includes 255 /// a list of functions used for code generation of implicitly inlined 256 /// regions. 257 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 258 OpenMPDirectiveKind Kind, bool HasCancel) 259 : CGF(CGF) { 260 // Start emission for the construct. 261 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 262 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 263 } 264 ~InlinedOpenMPRegionRAII() { 265 // Restore original CapturedStmtInfo only if we're done with code emission. 266 auto *OldCSI = 267 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 268 delete CGF.CapturedStmtInfo; 269 CGF.CapturedStmtInfo = OldCSI; 270 } 271 }; 272 273 } // anonymous namespace 274 275 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr, 276 QualType Ty) { 277 AlignmentSource Source; 278 CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source); 279 return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align), 280 Ty->getPointeeType(), Source); 281 } 282 283 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 284 return emitLoadOfPointerLValue(CGF, 285 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 286 getThreadIDVariable()->getType()); 287 } 288 289 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 290 if (!CGF.HaveInsertPoint()) 291 return; 292 // 1.2.2 OpenMP Language Terminology 293 // Structured block - An executable statement with a single entry at the 294 // top and a single exit at the bottom. 295 // The point of exit cannot be a branch out of the structured block. 296 // longjmp() and throw() must not violate the entry/exit criteria. 297 CGF.EHStack.pushTerminate(); 298 { 299 CodeGenFunction::RunCleanupsScope Scope(CGF); 300 CodeGen(CGF); 301 } 302 CGF.EHStack.popTerminate(); 303 } 304 305 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 306 CodeGenFunction &CGF) { 307 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 308 getThreadIDVariable()->getType(), 309 AlignmentSource::Decl); 310 } 311 312 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 313 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr), 314 OffloadEntriesInfoManager(CGM) { 315 IdentTy = llvm::StructType::create( 316 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 317 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 318 CGM.Int8PtrTy /* psource */, nullptr); 319 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 320 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 321 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 322 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 323 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 324 325 loadOffloadInfoMetadata(); 326 } 327 328 void CGOpenMPRuntime::clear() { 329 InternalVars.clear(); 330 } 331 332 // Layout information for ident_t. 333 static CharUnits getIdentAlign(CodeGenModule &CGM) { 334 return CGM.getPointerAlign(); 335 } 336 static CharUnits getIdentSize(CodeGenModule &CGM) { 337 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 338 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 339 } 340 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) { 341 // All the fields except the last are i32, so this works beautifully. 342 return unsigned(Field) * CharUnits::fromQuantity(4); 343 } 344 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 345 CGOpenMPRuntime::IdentFieldIndex Field, 346 const llvm::Twine &Name = "") { 347 auto Offset = getOffsetOfIdentField(Field); 348 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 349 } 350 351 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 352 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 353 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 354 assert(ThreadIDVar->getType()->isPointerType() && 355 "thread id variable must be of type kmp_int32 *"); 356 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 357 CodeGenFunction CGF(CGM, true); 358 bool HasCancel = false; 359 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 360 HasCancel = OPD->hasCancel(); 361 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 362 HasCancel = OPSD->hasCancel(); 363 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 364 HasCancel = OPFD->hasCancel(); 365 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 366 HasCancel); 367 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 368 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 369 } 370 371 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 372 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 373 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 374 assert(!ThreadIDVar->getType()->isPointerType() && 375 "thread id variable must be of type kmp_int32 for tasks"); 376 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 377 CodeGenFunction CGF(CGM, true); 378 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 379 InnermostKind, 380 cast<OMPTaskDirective>(D).hasCancel()); 381 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 382 return CGF.GenerateCapturedStmtFunction(*CS); 383 } 384 385 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 386 CharUnits Align = getIdentAlign(CGM); 387 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 388 if (!Entry) { 389 if (!DefaultOpenMPPSource) { 390 // Initialize default location for psource field of ident_t structure of 391 // all ident_t objects. Format is ";file;function;line;column;;". 392 // Taken from 393 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 394 DefaultOpenMPPSource = 395 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 396 DefaultOpenMPPSource = 397 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 398 } 399 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 400 CGM.getModule(), IdentTy, /*isConstant*/ true, 401 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 402 DefaultOpenMPLocation->setUnnamedAddr(true); 403 DefaultOpenMPLocation->setAlignment(Align.getQuantity()); 404 405 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 406 llvm::Constant *Values[] = {Zero, 407 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 408 Zero, Zero, DefaultOpenMPPSource}; 409 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 410 DefaultOpenMPLocation->setInitializer(Init); 411 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 412 } 413 return Address(Entry, Align); 414 } 415 416 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 417 SourceLocation Loc, 418 OpenMPLocationFlags Flags) { 419 // If no debug info is generated - return global default location. 420 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 421 Loc.isInvalid()) 422 return getOrCreateDefaultLocation(Flags).getPointer(); 423 424 assert(CGF.CurFn && "No function in current CodeGenFunction."); 425 426 Address LocValue = Address::invalid(); 427 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 428 if (I != OpenMPLocThreadIDMap.end()) 429 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 430 431 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 432 // GetOpenMPThreadID was called before this routine. 433 if (!LocValue.isValid()) { 434 // Generate "ident_t .kmpc_loc.addr;" 435 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 436 ".kmpc_loc.addr"); 437 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 438 Elem.second.DebugLoc = AI.getPointer(); 439 LocValue = AI; 440 441 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 442 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 443 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 444 CGM.getSize(getIdentSize(CGF.CGM))); 445 } 446 447 // char **psource = &.kmpc_loc_<flags>.addr.psource; 448 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 449 450 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 451 if (OMPDebugLoc == nullptr) { 452 SmallString<128> Buffer2; 453 llvm::raw_svector_ostream OS2(Buffer2); 454 // Build debug location 455 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 456 OS2 << ";" << PLoc.getFilename() << ";"; 457 if (const FunctionDecl *FD = 458 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 459 OS2 << FD->getQualifiedNameAsString(); 460 } 461 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 462 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 463 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 464 } 465 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 466 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 467 468 // Our callers always pass this to a runtime function, so for 469 // convenience, go ahead and return a naked pointer. 470 return LocValue.getPointer(); 471 } 472 473 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 474 SourceLocation Loc) { 475 assert(CGF.CurFn && "No function in current CodeGenFunction."); 476 477 llvm::Value *ThreadID = nullptr; 478 // Check whether we've already cached a load of the thread id in this 479 // function. 480 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 481 if (I != OpenMPLocThreadIDMap.end()) { 482 ThreadID = I->second.ThreadID; 483 if (ThreadID != nullptr) 484 return ThreadID; 485 } 486 if (auto *OMPRegionInfo = 487 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 488 if (OMPRegionInfo->getThreadIDVariable()) { 489 // Check if this an outlined function with thread id passed as argument. 490 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 491 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 492 // If value loaded in entry block, cache it and use it everywhere in 493 // function. 494 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 495 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 496 Elem.second.ThreadID = ThreadID; 497 } 498 return ThreadID; 499 } 500 } 501 502 // This is not an outlined function region - need to call __kmpc_int32 503 // kmpc_global_thread_num(ident_t *loc). 504 // Generate thread id value and cache this value for use across the 505 // function. 506 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 507 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 508 ThreadID = 509 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 510 emitUpdateLocation(CGF, Loc)); 511 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 512 Elem.second.ThreadID = ThreadID; 513 return ThreadID; 514 } 515 516 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 517 assert(CGF.CurFn && "No function in current CodeGenFunction."); 518 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 519 OpenMPLocThreadIDMap.erase(CGF.CurFn); 520 } 521 522 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 523 return llvm::PointerType::getUnqual(IdentTy); 524 } 525 526 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 527 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 528 } 529 530 llvm::Constant * 531 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 532 llvm::Constant *RTLFn = nullptr; 533 switch (Function) { 534 case OMPRTL__kmpc_fork_call: { 535 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 536 // microtask, ...); 537 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 538 getKmpc_MicroPointerTy()}; 539 llvm::FunctionType *FnTy = 540 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 541 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 542 break; 543 } 544 case OMPRTL__kmpc_global_thread_num: { 545 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 546 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 547 llvm::FunctionType *FnTy = 548 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 549 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 550 break; 551 } 552 case OMPRTL__kmpc_threadprivate_cached: { 553 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 554 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 555 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 556 CGM.VoidPtrTy, CGM.SizeTy, 557 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 558 llvm::FunctionType *FnTy = 559 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 560 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 561 break; 562 } 563 case OMPRTL__kmpc_critical: { 564 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 565 // kmp_critical_name *crit); 566 llvm::Type *TypeParams[] = { 567 getIdentTyPointerTy(), CGM.Int32Ty, 568 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 569 llvm::FunctionType *FnTy = 570 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 571 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 572 break; 573 } 574 case OMPRTL__kmpc_critical_with_hint: { 575 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 576 // kmp_critical_name *crit, uintptr_t hint); 577 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 578 llvm::PointerType::getUnqual(KmpCriticalNameTy), 579 CGM.IntPtrTy}; 580 llvm::FunctionType *FnTy = 581 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 582 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 583 break; 584 } 585 case OMPRTL__kmpc_threadprivate_register: { 586 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 587 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 588 // typedef void *(*kmpc_ctor)(void *); 589 auto KmpcCtorTy = 590 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 591 /*isVarArg*/ false)->getPointerTo(); 592 // typedef void *(*kmpc_cctor)(void *, void *); 593 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 594 auto KmpcCopyCtorTy = 595 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 596 /*isVarArg*/ false)->getPointerTo(); 597 // typedef void (*kmpc_dtor)(void *); 598 auto KmpcDtorTy = 599 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 600 ->getPointerTo(); 601 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 602 KmpcCopyCtorTy, KmpcDtorTy}; 603 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 604 /*isVarArg*/ false); 605 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 606 break; 607 } 608 case OMPRTL__kmpc_end_critical: { 609 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 610 // kmp_critical_name *crit); 611 llvm::Type *TypeParams[] = { 612 getIdentTyPointerTy(), CGM.Int32Ty, 613 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 614 llvm::FunctionType *FnTy = 615 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 616 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 617 break; 618 } 619 case OMPRTL__kmpc_cancel_barrier: { 620 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 621 // global_tid); 622 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 623 llvm::FunctionType *FnTy = 624 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 625 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 626 break; 627 } 628 case OMPRTL__kmpc_barrier: { 629 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 630 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 631 llvm::FunctionType *FnTy = 632 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 633 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 634 break; 635 } 636 case OMPRTL__kmpc_for_static_fini: { 637 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 638 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 639 llvm::FunctionType *FnTy = 640 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 641 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 642 break; 643 } 644 case OMPRTL__kmpc_push_num_threads: { 645 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 646 // kmp_int32 num_threads) 647 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 648 CGM.Int32Ty}; 649 llvm::FunctionType *FnTy = 650 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 651 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 652 break; 653 } 654 case OMPRTL__kmpc_serialized_parallel: { 655 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 656 // global_tid); 657 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 658 llvm::FunctionType *FnTy = 659 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 660 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 661 break; 662 } 663 case OMPRTL__kmpc_end_serialized_parallel: { 664 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 665 // global_tid); 666 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 667 llvm::FunctionType *FnTy = 668 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 669 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 670 break; 671 } 672 case OMPRTL__kmpc_flush: { 673 // Build void __kmpc_flush(ident_t *loc); 674 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 675 llvm::FunctionType *FnTy = 676 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 677 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 678 break; 679 } 680 case OMPRTL__kmpc_master: { 681 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 682 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 683 llvm::FunctionType *FnTy = 684 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 685 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 686 break; 687 } 688 case OMPRTL__kmpc_end_master: { 689 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 690 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 691 llvm::FunctionType *FnTy = 692 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 693 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 694 break; 695 } 696 case OMPRTL__kmpc_omp_taskyield: { 697 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 698 // int end_part); 699 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 700 llvm::FunctionType *FnTy = 701 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 702 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 703 break; 704 } 705 case OMPRTL__kmpc_single: { 706 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 707 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 708 llvm::FunctionType *FnTy = 709 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 710 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 711 break; 712 } 713 case OMPRTL__kmpc_end_single: { 714 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 715 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 716 llvm::FunctionType *FnTy = 717 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 718 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 719 break; 720 } 721 case OMPRTL__kmpc_omp_task_alloc: { 722 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 723 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 724 // kmp_routine_entry_t *task_entry); 725 assert(KmpRoutineEntryPtrTy != nullptr && 726 "Type kmp_routine_entry_t must be created."); 727 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 728 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 729 // Return void * and then cast to particular kmp_task_t type. 730 llvm::FunctionType *FnTy = 731 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 732 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 733 break; 734 } 735 case OMPRTL__kmpc_omp_task: { 736 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 737 // *new_task); 738 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 739 CGM.VoidPtrTy}; 740 llvm::FunctionType *FnTy = 741 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 742 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 743 break; 744 } 745 case OMPRTL__kmpc_copyprivate: { 746 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 747 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 748 // kmp_int32 didit); 749 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 750 auto *CpyFnTy = 751 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 752 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 753 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 754 CGM.Int32Ty}; 755 llvm::FunctionType *FnTy = 756 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 757 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 758 break; 759 } 760 case OMPRTL__kmpc_reduce: { 761 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 762 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 763 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 764 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 765 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 766 /*isVarArg=*/false); 767 llvm::Type *TypeParams[] = { 768 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 769 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 770 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 771 llvm::FunctionType *FnTy = 772 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 773 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 774 break; 775 } 776 case OMPRTL__kmpc_reduce_nowait: { 777 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 778 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 779 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 780 // *lck); 781 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 782 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 783 /*isVarArg=*/false); 784 llvm::Type *TypeParams[] = { 785 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 786 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 787 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 788 llvm::FunctionType *FnTy = 789 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 790 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 791 break; 792 } 793 case OMPRTL__kmpc_end_reduce: { 794 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 795 // kmp_critical_name *lck); 796 llvm::Type *TypeParams[] = { 797 getIdentTyPointerTy(), CGM.Int32Ty, 798 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 799 llvm::FunctionType *FnTy = 800 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 801 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 802 break; 803 } 804 case OMPRTL__kmpc_end_reduce_nowait: { 805 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 806 // kmp_critical_name *lck); 807 llvm::Type *TypeParams[] = { 808 getIdentTyPointerTy(), CGM.Int32Ty, 809 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 810 llvm::FunctionType *FnTy = 811 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 812 RTLFn = 813 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 814 break; 815 } 816 case OMPRTL__kmpc_omp_task_begin_if0: { 817 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 818 // *new_task); 819 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 820 CGM.VoidPtrTy}; 821 llvm::FunctionType *FnTy = 822 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 823 RTLFn = 824 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 825 break; 826 } 827 case OMPRTL__kmpc_omp_task_complete_if0: { 828 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 829 // *new_task); 830 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 831 CGM.VoidPtrTy}; 832 llvm::FunctionType *FnTy = 833 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 834 RTLFn = CGM.CreateRuntimeFunction(FnTy, 835 /*Name=*/"__kmpc_omp_task_complete_if0"); 836 break; 837 } 838 case OMPRTL__kmpc_ordered: { 839 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 840 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 841 llvm::FunctionType *FnTy = 842 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 843 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 844 break; 845 } 846 case OMPRTL__kmpc_end_ordered: { 847 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 848 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 849 llvm::FunctionType *FnTy = 850 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 851 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 852 break; 853 } 854 case OMPRTL__kmpc_omp_taskwait: { 855 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 856 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 857 llvm::FunctionType *FnTy = 858 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 859 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 860 break; 861 } 862 case OMPRTL__kmpc_taskgroup: { 863 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 864 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 865 llvm::FunctionType *FnTy = 866 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 867 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 868 break; 869 } 870 case OMPRTL__kmpc_end_taskgroup: { 871 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 872 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 873 llvm::FunctionType *FnTy = 874 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 875 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 876 break; 877 } 878 case OMPRTL__kmpc_push_proc_bind: { 879 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 880 // int proc_bind) 881 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 882 llvm::FunctionType *FnTy = 883 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 884 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 885 break; 886 } 887 case OMPRTL__kmpc_omp_task_with_deps: { 888 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 889 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 890 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 891 llvm::Type *TypeParams[] = { 892 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 893 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 894 llvm::FunctionType *FnTy = 895 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 896 RTLFn = 897 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 898 break; 899 } 900 case OMPRTL__kmpc_omp_wait_deps: { 901 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 902 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 903 // kmp_depend_info_t *noalias_dep_list); 904 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 905 CGM.Int32Ty, CGM.VoidPtrTy, 906 CGM.Int32Ty, CGM.VoidPtrTy}; 907 llvm::FunctionType *FnTy = 908 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 909 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 910 break; 911 } 912 case OMPRTL__kmpc_cancellationpoint: { 913 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 914 // global_tid, kmp_int32 cncl_kind) 915 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 916 llvm::FunctionType *FnTy = 917 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 918 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 919 break; 920 } 921 case OMPRTL__kmpc_cancel: { 922 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 923 // kmp_int32 cncl_kind) 924 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 925 llvm::FunctionType *FnTy = 926 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 927 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 928 break; 929 } 930 case OMPRTL__tgt_target: { 931 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 932 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 933 // *arg_types); 934 llvm::Type *TypeParams[] = {CGM.Int32Ty, 935 CGM.VoidPtrTy, 936 CGM.Int32Ty, 937 CGM.VoidPtrPtrTy, 938 CGM.VoidPtrPtrTy, 939 CGM.SizeTy->getPointerTo(), 940 CGM.Int32Ty->getPointerTo()}; 941 llvm::FunctionType *FnTy = 942 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 943 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 944 break; 945 } 946 case OMPRTL__tgt_register_lib: { 947 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 948 QualType ParamTy = 949 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 950 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 951 llvm::FunctionType *FnTy = 952 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 953 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 954 break; 955 } 956 case OMPRTL__tgt_unregister_lib: { 957 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 958 QualType ParamTy = 959 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 960 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 961 llvm::FunctionType *FnTy = 962 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 963 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 964 break; 965 } 966 } 967 return RTLFn; 968 } 969 970 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 971 bool IVSigned) { 972 assert((IVSize == 32 || IVSize == 64) && 973 "IV size is not compatible with the omp runtime"); 974 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 975 : "__kmpc_for_static_init_4u") 976 : (IVSigned ? "__kmpc_for_static_init_8" 977 : "__kmpc_for_static_init_8u"); 978 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 979 auto PtrTy = llvm::PointerType::getUnqual(ITy); 980 llvm::Type *TypeParams[] = { 981 getIdentTyPointerTy(), // loc 982 CGM.Int32Ty, // tid 983 CGM.Int32Ty, // schedtype 984 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 985 PtrTy, // p_lower 986 PtrTy, // p_upper 987 PtrTy, // p_stride 988 ITy, // incr 989 ITy // chunk 990 }; 991 llvm::FunctionType *FnTy = 992 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 993 return CGM.CreateRuntimeFunction(FnTy, Name); 994 } 995 996 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 997 bool IVSigned) { 998 assert((IVSize == 32 || IVSize == 64) && 999 "IV size is not compatible with the omp runtime"); 1000 auto Name = 1001 IVSize == 32 1002 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1003 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1004 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1005 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1006 CGM.Int32Ty, // tid 1007 CGM.Int32Ty, // schedtype 1008 ITy, // lower 1009 ITy, // upper 1010 ITy, // stride 1011 ITy // chunk 1012 }; 1013 llvm::FunctionType *FnTy = 1014 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1015 return CGM.CreateRuntimeFunction(FnTy, Name); 1016 } 1017 1018 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1019 bool IVSigned) { 1020 assert((IVSize == 32 || IVSize == 64) && 1021 "IV size is not compatible with the omp runtime"); 1022 auto Name = 1023 IVSize == 32 1024 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1025 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1026 llvm::Type *TypeParams[] = { 1027 getIdentTyPointerTy(), // loc 1028 CGM.Int32Ty, // tid 1029 }; 1030 llvm::FunctionType *FnTy = 1031 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1032 return CGM.CreateRuntimeFunction(FnTy, Name); 1033 } 1034 1035 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1036 bool IVSigned) { 1037 assert((IVSize == 32 || IVSize == 64) && 1038 "IV size is not compatible with the omp runtime"); 1039 auto Name = 1040 IVSize == 32 1041 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1042 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1043 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1044 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1045 llvm::Type *TypeParams[] = { 1046 getIdentTyPointerTy(), // loc 1047 CGM.Int32Ty, // tid 1048 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1049 PtrTy, // p_lower 1050 PtrTy, // p_upper 1051 PtrTy // p_stride 1052 }; 1053 llvm::FunctionType *FnTy = 1054 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1055 return CGM.CreateRuntimeFunction(FnTy, Name); 1056 } 1057 1058 llvm::Constant * 1059 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1060 assert(!CGM.getLangOpts().OpenMPUseTLS || 1061 !CGM.getContext().getTargetInfo().isTLSSupported()); 1062 // Lookup the entry, lazily creating it if necessary. 1063 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1064 Twine(CGM.getMangledName(VD)) + ".cache."); 1065 } 1066 1067 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1068 const VarDecl *VD, 1069 Address VDAddr, 1070 SourceLocation Loc) { 1071 if (CGM.getLangOpts().OpenMPUseTLS && 1072 CGM.getContext().getTargetInfo().isTLSSupported()) 1073 return VDAddr; 1074 1075 auto VarTy = VDAddr.getElementType(); 1076 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1077 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1078 CGM.Int8PtrTy), 1079 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1080 getOrCreateThreadPrivateCache(VD)}; 1081 return Address(CGF.EmitRuntimeCall( 1082 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1083 VDAddr.getAlignment()); 1084 } 1085 1086 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1087 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1088 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1089 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1090 // library. 1091 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1092 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1093 OMPLoc); 1094 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1095 // to register constructor/destructor for variable. 1096 llvm::Value *Args[] = {OMPLoc, 1097 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1098 CGM.VoidPtrTy), 1099 Ctor, CopyCtor, Dtor}; 1100 CGF.EmitRuntimeCall( 1101 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1102 } 1103 1104 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1105 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1106 bool PerformInit, CodeGenFunction *CGF) { 1107 if (CGM.getLangOpts().OpenMPUseTLS && 1108 CGM.getContext().getTargetInfo().isTLSSupported()) 1109 return nullptr; 1110 1111 VD = VD->getDefinition(CGM.getContext()); 1112 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1113 ThreadPrivateWithDefinition.insert(VD); 1114 QualType ASTTy = VD->getType(); 1115 1116 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1117 auto Init = VD->getAnyInitializer(); 1118 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1119 // Generate function that re-emits the declaration's initializer into the 1120 // threadprivate copy of the variable VD 1121 CodeGenFunction CtorCGF(CGM); 1122 FunctionArgList Args; 1123 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1124 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1125 Args.push_back(&Dst); 1126 1127 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1128 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 1129 /*isVariadic=*/false); 1130 auto FTy = CGM.getTypes().GetFunctionType(FI); 1131 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1132 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1133 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1134 Args, SourceLocation()); 1135 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1136 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1137 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1138 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1139 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1140 CtorCGF.ConvertTypeForMem(ASTTy)); 1141 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1142 /*IsInitializer=*/true); 1143 ArgVal = CtorCGF.EmitLoadOfScalar( 1144 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1145 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1146 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1147 CtorCGF.FinishFunction(); 1148 Ctor = Fn; 1149 } 1150 if (VD->getType().isDestructedType() != QualType::DK_none) { 1151 // Generate function that emits destructor call for the threadprivate copy 1152 // of the variable VD 1153 CodeGenFunction DtorCGF(CGM); 1154 FunctionArgList Args; 1155 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1156 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1157 Args.push_back(&Dst); 1158 1159 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1160 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 1161 /*isVariadic=*/false); 1162 auto FTy = CGM.getTypes().GetFunctionType(FI); 1163 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1164 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1165 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1166 SourceLocation()); 1167 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1168 DtorCGF.GetAddrOfLocalVar(&Dst), 1169 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1170 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1171 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1172 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1173 DtorCGF.FinishFunction(); 1174 Dtor = Fn; 1175 } 1176 // Do not emit init function if it is not required. 1177 if (!Ctor && !Dtor) 1178 return nullptr; 1179 1180 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1181 auto CopyCtorTy = 1182 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1183 /*isVarArg=*/false)->getPointerTo(); 1184 // Copying constructor for the threadprivate variable. 1185 // Must be NULL - reserved by runtime, but currently it requires that this 1186 // parameter is always NULL. Otherwise it fires assertion. 1187 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1188 if (Ctor == nullptr) { 1189 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1190 /*isVarArg=*/false)->getPointerTo(); 1191 Ctor = llvm::Constant::getNullValue(CtorTy); 1192 } 1193 if (Dtor == nullptr) { 1194 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1195 /*isVarArg=*/false)->getPointerTo(); 1196 Dtor = llvm::Constant::getNullValue(DtorTy); 1197 } 1198 if (!CGF) { 1199 auto InitFunctionTy = 1200 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1201 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1202 InitFunctionTy, ".__omp_threadprivate_init_.", 1203 CGM.getTypes().arrangeNullaryFunction()); 1204 CodeGenFunction InitCGF(CGM); 1205 FunctionArgList ArgList; 1206 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1207 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1208 Loc); 1209 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1210 InitCGF.FinishFunction(); 1211 return InitFunction; 1212 } 1213 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1214 } 1215 return nullptr; 1216 } 1217 1218 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1219 /// function. Here is the logic: 1220 /// if (Cond) { 1221 /// ThenGen(); 1222 /// } else { 1223 /// ElseGen(); 1224 /// } 1225 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1226 const RegionCodeGenTy &ThenGen, 1227 const RegionCodeGenTy &ElseGen) { 1228 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1229 1230 // If the condition constant folds and can be elided, try to avoid emitting 1231 // the condition and the dead arm of the if/else. 1232 bool CondConstant; 1233 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1234 CodeGenFunction::RunCleanupsScope Scope(CGF); 1235 if (CondConstant) { 1236 ThenGen(CGF); 1237 } else { 1238 ElseGen(CGF); 1239 } 1240 return; 1241 } 1242 1243 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1244 // emit the conditional branch. 1245 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1246 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1247 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1248 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1249 1250 // Emit the 'then' code. 1251 CGF.EmitBlock(ThenBlock); 1252 { 1253 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1254 ThenGen(CGF); 1255 } 1256 CGF.EmitBranch(ContBlock); 1257 // Emit the 'else' code if present. 1258 { 1259 // There is no need to emit line number for unconditional branch. 1260 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1261 CGF.EmitBlock(ElseBlock); 1262 } 1263 { 1264 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1265 ElseGen(CGF); 1266 } 1267 { 1268 // There is no need to emit line number for unconditional branch. 1269 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1270 CGF.EmitBranch(ContBlock); 1271 } 1272 // Emit the continuation block for code after the if. 1273 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1274 } 1275 1276 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1277 llvm::Value *OutlinedFn, 1278 ArrayRef<llvm::Value *> CapturedVars, 1279 const Expr *IfCond) { 1280 if (!CGF.HaveInsertPoint()) 1281 return; 1282 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1283 auto &&ThenGen = [this, OutlinedFn, CapturedVars, 1284 RTLoc](CodeGenFunction &CGF) { 1285 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1286 llvm::Value *Args[] = { 1287 RTLoc, 1288 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1289 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 1290 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1291 RealArgs.append(std::begin(Args), std::end(Args)); 1292 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1293 1294 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 1295 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1296 }; 1297 auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, 1298 Loc](CodeGenFunction &CGF) { 1299 auto ThreadID = getThreadID(CGF, Loc); 1300 // Build calls: 1301 // __kmpc_serialized_parallel(&Loc, GTid); 1302 llvm::Value *Args[] = {RTLoc, ThreadID}; 1303 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 1304 Args); 1305 1306 // OutlinedFn(>id, &zero, CapturedStruct); 1307 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 1308 Address ZeroAddr = 1309 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1310 /*Name*/ ".zero.addr"); 1311 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1312 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1313 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1314 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1315 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1316 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1317 1318 // __kmpc_end_serialized_parallel(&Loc, GTid); 1319 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 1320 CGF.EmitRuntimeCall( 1321 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 1322 }; 1323 if (IfCond) { 1324 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1325 } else { 1326 CodeGenFunction::RunCleanupsScope Scope(CGF); 1327 ThenGen(CGF); 1328 } 1329 } 1330 1331 // If we're inside an (outlined) parallel region, use the region info's 1332 // thread-ID variable (it is passed in a first argument of the outlined function 1333 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1334 // regular serial code region, get thread ID by calling kmp_int32 1335 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1336 // return the address of that temp. 1337 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1338 SourceLocation Loc) { 1339 if (auto *OMPRegionInfo = 1340 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1341 if (OMPRegionInfo->getThreadIDVariable()) 1342 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1343 1344 auto ThreadID = getThreadID(CGF, Loc); 1345 auto Int32Ty = 1346 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1347 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1348 CGF.EmitStoreOfScalar(ThreadID, 1349 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1350 1351 return ThreadIDTemp; 1352 } 1353 1354 llvm::Constant * 1355 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1356 const llvm::Twine &Name) { 1357 SmallString<256> Buffer; 1358 llvm::raw_svector_ostream Out(Buffer); 1359 Out << Name; 1360 auto RuntimeName = Out.str(); 1361 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1362 if (Elem.second) { 1363 assert(Elem.second->getType()->getPointerElementType() == Ty && 1364 "OMP internal variable has different type than requested"); 1365 return &*Elem.second; 1366 } 1367 1368 return Elem.second = new llvm::GlobalVariable( 1369 CGM.getModule(), Ty, /*IsConstant*/ false, 1370 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1371 Elem.first()); 1372 } 1373 1374 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1375 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1376 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1377 } 1378 1379 namespace { 1380 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup { 1381 llvm::Value *Callee; 1382 llvm::Value *Args[N]; 1383 1384 public: 1385 CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) 1386 : Callee(Callee) { 1387 assert(CleanupArgs.size() == N); 1388 std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); 1389 } 1390 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 1391 if (!CGF.HaveInsertPoint()) 1392 return; 1393 CGF.EmitRuntimeCall(Callee, Args); 1394 } 1395 }; 1396 } // anonymous namespace 1397 1398 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1399 StringRef CriticalName, 1400 const RegionCodeGenTy &CriticalOpGen, 1401 SourceLocation Loc, const Expr *Hint) { 1402 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 1403 // CriticalOpGen(); 1404 // __kmpc_end_critical(ident_t *, gtid, Lock); 1405 // Prepare arguments and build a call to __kmpc_critical 1406 if (!CGF.HaveInsertPoint()) 1407 return; 1408 CodeGenFunction::RunCleanupsScope Scope(CGF); 1409 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1410 getCriticalRegionLock(CriticalName)}; 1411 if (Hint) { 1412 llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args), 1413 std::end(Args)); 1414 auto *HintVal = CGF.EmitScalarExpr(Hint); 1415 ArgsWithHint.push_back( 1416 CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false)); 1417 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint), 1418 ArgsWithHint); 1419 } else 1420 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 1421 // Build a call to __kmpc_end_critical 1422 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1423 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), 1424 llvm::makeArrayRef(Args)); 1425 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 1426 } 1427 1428 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 1429 OpenMPDirectiveKind Kind, SourceLocation Loc, 1430 const RegionCodeGenTy &BodyOpGen) { 1431 llvm::Value *CallBool = CGF.EmitScalarConversion( 1432 IfCond, 1433 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 1434 CGF.getContext().BoolTy, Loc); 1435 1436 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1437 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 1438 // Generate the branch (If-stmt) 1439 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1440 CGF.EmitBlock(ThenBlock); 1441 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); 1442 // Emit the rest of bblocks/branches 1443 CGF.EmitBranch(ContBlock); 1444 CGF.EmitBlock(ContBlock, true); 1445 } 1446 1447 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 1448 const RegionCodeGenTy &MasterOpGen, 1449 SourceLocation Loc) { 1450 if (!CGF.HaveInsertPoint()) 1451 return; 1452 // if(__kmpc_master(ident_t *, gtid)) { 1453 // MasterOpGen(); 1454 // __kmpc_end_master(ident_t *, gtid); 1455 // } 1456 // Prepare arguments and build a call to __kmpc_master 1457 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1458 auto *IsMaster = 1459 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 1460 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1461 MasterCallEndCleanup; 1462 emitIfStmt( 1463 CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { 1464 CodeGenFunction::RunCleanupsScope Scope(CGF); 1465 CGF.EHStack.pushCleanup<MasterCallEndCleanup>( 1466 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), 1467 llvm::makeArrayRef(Args)); 1468 MasterOpGen(CGF); 1469 }); 1470 } 1471 1472 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 1473 SourceLocation Loc) { 1474 if (!CGF.HaveInsertPoint()) 1475 return; 1476 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 1477 llvm::Value *Args[] = { 1478 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1479 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 1480 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 1481 } 1482 1483 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 1484 const RegionCodeGenTy &TaskgroupOpGen, 1485 SourceLocation Loc) { 1486 if (!CGF.HaveInsertPoint()) 1487 return; 1488 // __kmpc_taskgroup(ident_t *, gtid); 1489 // TaskgroupOpGen(); 1490 // __kmpc_end_taskgroup(ident_t *, gtid); 1491 // Prepare arguments and build a call to __kmpc_taskgroup 1492 { 1493 CodeGenFunction::RunCleanupsScope Scope(CGF); 1494 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1495 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); 1496 // Build a call to __kmpc_end_taskgroup 1497 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1498 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 1499 llvm::makeArrayRef(Args)); 1500 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 1501 } 1502 } 1503 1504 /// Given an array of pointers to variables, project the address of a 1505 /// given variable. 1506 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 1507 unsigned Index, const VarDecl *Var) { 1508 // Pull out the pointer to the variable. 1509 Address PtrAddr = 1510 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 1511 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 1512 1513 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 1514 Addr = CGF.Builder.CreateElementBitCast( 1515 Addr, CGF.ConvertTypeForMem(Var->getType())); 1516 return Addr; 1517 } 1518 1519 static llvm::Value *emitCopyprivateCopyFunction( 1520 CodeGenModule &CGM, llvm::Type *ArgsType, 1521 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 1522 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 1523 auto &C = CGM.getContext(); 1524 // void copy_func(void *LHSArg, void *RHSArg); 1525 FunctionArgList Args; 1526 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1527 C.VoidPtrTy); 1528 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1529 C.VoidPtrTy); 1530 Args.push_back(&LHSArg); 1531 Args.push_back(&RHSArg); 1532 FunctionType::ExtInfo EI; 1533 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1534 C.VoidTy, Args, EI, /*isVariadic=*/false); 1535 auto *Fn = llvm::Function::Create( 1536 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1537 ".omp.copyprivate.copy_func", &CGM.getModule()); 1538 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 1539 CodeGenFunction CGF(CGM); 1540 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1541 // Dest = (void*[n])(LHSArg); 1542 // Src = (void*[n])(RHSArg); 1543 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1544 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 1545 ArgsType), CGF.getPointerAlign()); 1546 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1547 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 1548 ArgsType), CGF.getPointerAlign()); 1549 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1550 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1551 // ... 1552 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1553 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1554 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 1555 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 1556 1557 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 1558 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 1559 1560 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 1561 QualType Type = VD->getType(); 1562 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 1563 } 1564 CGF.FinishFunction(); 1565 return Fn; 1566 } 1567 1568 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1569 const RegionCodeGenTy &SingleOpGen, 1570 SourceLocation Loc, 1571 ArrayRef<const Expr *> CopyprivateVars, 1572 ArrayRef<const Expr *> SrcExprs, 1573 ArrayRef<const Expr *> DstExprs, 1574 ArrayRef<const Expr *> AssignmentOps) { 1575 if (!CGF.HaveInsertPoint()) 1576 return; 1577 assert(CopyprivateVars.size() == SrcExprs.size() && 1578 CopyprivateVars.size() == DstExprs.size() && 1579 CopyprivateVars.size() == AssignmentOps.size()); 1580 auto &C = CGM.getContext(); 1581 // int32 did_it = 0; 1582 // if(__kmpc_single(ident_t *, gtid)) { 1583 // SingleOpGen(); 1584 // __kmpc_end_single(ident_t *, gtid); 1585 // did_it = 1; 1586 // } 1587 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1588 // <copy_func>, did_it); 1589 1590 Address DidIt = Address::invalid(); 1591 if (!CopyprivateVars.empty()) { 1592 // int32 did_it = 0; 1593 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1594 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 1595 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 1596 } 1597 // Prepare arguments and build a call to __kmpc_single 1598 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1599 auto *IsSingle = 1600 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 1601 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1602 SingleCallEndCleanup; 1603 emitIfStmt( 1604 CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { 1605 CodeGenFunction::RunCleanupsScope Scope(CGF); 1606 CGF.EHStack.pushCleanup<SingleCallEndCleanup>( 1607 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), 1608 llvm::makeArrayRef(Args)); 1609 SingleOpGen(CGF); 1610 if (DidIt.isValid()) { 1611 // did_it = 1; 1612 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 1613 } 1614 }); 1615 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1616 // <copy_func>, did_it); 1617 if (DidIt.isValid()) { 1618 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 1619 auto CopyprivateArrayTy = 1620 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1621 /*IndexTypeQuals=*/0); 1622 // Create a list of all private variables for copyprivate. 1623 Address CopyprivateList = 1624 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 1625 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 1626 Address Elem = CGF.Builder.CreateConstArrayGEP( 1627 CopyprivateList, I, CGF.getPointerSize()); 1628 CGF.Builder.CreateStore( 1629 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1630 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 1631 Elem); 1632 } 1633 // Build function that copies private values from single region to all other 1634 // threads in the corresponding parallel region. 1635 auto *CpyFn = emitCopyprivateCopyFunction( 1636 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 1637 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 1638 auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 1639 Address CL = 1640 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 1641 CGF.VoidPtrTy); 1642 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 1643 llvm::Value *Args[] = { 1644 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 1645 getThreadID(CGF, Loc), // i32 <gtid> 1646 BufSize, // size_t <buf_size> 1647 CL.getPointer(), // void *<copyprivate list> 1648 CpyFn, // void (*) (void *, void *) <copy_func> 1649 DidItVal // i32 did_it 1650 }; 1651 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 1652 } 1653 } 1654 1655 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 1656 const RegionCodeGenTy &OrderedOpGen, 1657 SourceLocation Loc, bool IsThreads) { 1658 if (!CGF.HaveInsertPoint()) 1659 return; 1660 // __kmpc_ordered(ident_t *, gtid); 1661 // OrderedOpGen(); 1662 // __kmpc_end_ordered(ident_t *, gtid); 1663 // Prepare arguments and build a call to __kmpc_ordered 1664 CodeGenFunction::RunCleanupsScope Scope(CGF); 1665 if (IsThreads) { 1666 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1667 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); 1668 // Build a call to __kmpc_end_ordered 1669 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1670 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), 1671 llvm::makeArrayRef(Args)); 1672 } 1673 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 1674 } 1675 1676 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 1677 OpenMPDirectiveKind Kind, bool EmitChecks, 1678 bool ForceSimpleCall) { 1679 if (!CGF.HaveInsertPoint()) 1680 return; 1681 // Build call __kmpc_cancel_barrier(loc, thread_id); 1682 // Build call __kmpc_barrier(loc, thread_id); 1683 OpenMPLocationFlags Flags = OMP_IDENT_KMPC; 1684 if (Kind == OMPD_for) { 1685 Flags = 1686 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); 1687 } else if (Kind == OMPD_sections) { 1688 Flags = static_cast<OpenMPLocationFlags>(Flags | 1689 OMP_IDENT_BARRIER_IMPL_SECTIONS); 1690 } else if (Kind == OMPD_single) { 1691 Flags = 1692 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); 1693 } else if (Kind == OMPD_barrier) { 1694 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); 1695 } else { 1696 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); 1697 } 1698 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 1699 // thread_id); 1700 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 1701 getThreadID(CGF, Loc)}; 1702 if (auto *OMPRegionInfo = 1703 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1704 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 1705 auto *Result = CGF.EmitRuntimeCall( 1706 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 1707 if (EmitChecks) { 1708 // if (__kmpc_cancel_barrier()) { 1709 // exit from construct; 1710 // } 1711 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 1712 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 1713 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 1714 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 1715 CGF.EmitBlock(ExitBB); 1716 // exit from construct; 1717 auto CancelDestination = 1718 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 1719 CGF.EmitBranchThroughCleanup(CancelDestination); 1720 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 1721 } 1722 return; 1723 } 1724 } 1725 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 1726 } 1727 1728 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 1729 /// the enum sched_type in kmp.h). 1730 enum OpenMPSchedType { 1731 /// \brief Lower bound for default (unordered) versions. 1732 OMP_sch_lower = 32, 1733 OMP_sch_static_chunked = 33, 1734 OMP_sch_static = 34, 1735 OMP_sch_dynamic_chunked = 35, 1736 OMP_sch_guided_chunked = 36, 1737 OMP_sch_runtime = 37, 1738 OMP_sch_auto = 38, 1739 /// \brief Lower bound for 'ordered' versions. 1740 OMP_ord_lower = 64, 1741 OMP_ord_static_chunked = 65, 1742 OMP_ord_static = 66, 1743 OMP_ord_dynamic_chunked = 67, 1744 OMP_ord_guided_chunked = 68, 1745 OMP_ord_runtime = 69, 1746 OMP_ord_auto = 70, 1747 OMP_sch_default = OMP_sch_static, 1748 }; 1749 1750 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 1751 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 1752 bool Chunked, bool Ordered) { 1753 switch (ScheduleKind) { 1754 case OMPC_SCHEDULE_static: 1755 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 1756 : (Ordered ? OMP_ord_static : OMP_sch_static); 1757 case OMPC_SCHEDULE_dynamic: 1758 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 1759 case OMPC_SCHEDULE_guided: 1760 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 1761 case OMPC_SCHEDULE_runtime: 1762 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 1763 case OMPC_SCHEDULE_auto: 1764 return Ordered ? OMP_ord_auto : OMP_sch_auto; 1765 case OMPC_SCHEDULE_unknown: 1766 assert(!Chunked && "chunk was specified but schedule kind not known"); 1767 return Ordered ? OMP_ord_static : OMP_sch_static; 1768 } 1769 llvm_unreachable("Unexpected runtime schedule"); 1770 } 1771 1772 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 1773 bool Chunked) const { 1774 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 1775 return Schedule == OMP_sch_static; 1776 } 1777 1778 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 1779 auto Schedule = 1780 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 1781 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 1782 return Schedule != OMP_sch_static; 1783 } 1784 1785 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 1786 SourceLocation Loc, 1787 OpenMPScheduleClauseKind ScheduleKind, 1788 unsigned IVSize, bool IVSigned, 1789 bool Ordered, llvm::Value *UB, 1790 llvm::Value *Chunk) { 1791 if (!CGF.HaveInsertPoint()) 1792 return; 1793 OpenMPSchedType Schedule = 1794 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1795 assert(Ordered || 1796 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 1797 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); 1798 // Call __kmpc_dispatch_init( 1799 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 1800 // kmp_int[32|64] lower, kmp_int[32|64] upper, 1801 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 1802 1803 // If the Chunk was not specified in the clause - use default value 1. 1804 if (Chunk == nullptr) 1805 Chunk = CGF.Builder.getIntN(IVSize, 1); 1806 llvm::Value *Args[] = { 1807 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1808 getThreadID(CGF, Loc), 1809 CGF.Builder.getInt32(Schedule), // Schedule type 1810 CGF.Builder.getIntN(IVSize, 0), // Lower 1811 UB, // Upper 1812 CGF.Builder.getIntN(IVSize, 1), // Stride 1813 Chunk // Chunk 1814 }; 1815 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 1816 } 1817 1818 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 1819 SourceLocation Loc, 1820 OpenMPScheduleClauseKind ScheduleKind, 1821 unsigned IVSize, bool IVSigned, 1822 bool Ordered, Address IL, Address LB, 1823 Address UB, Address ST, 1824 llvm::Value *Chunk) { 1825 if (!CGF.HaveInsertPoint()) 1826 return; 1827 OpenMPSchedType Schedule = 1828 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1829 assert(!Ordered); 1830 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 1831 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked); 1832 1833 // Call __kmpc_for_static_init( 1834 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 1835 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 1836 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 1837 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 1838 if (Chunk == nullptr) { 1839 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && 1840 "expected static non-chunked schedule"); 1841 // If the Chunk was not specified in the clause - use default value 1. 1842 Chunk = CGF.Builder.getIntN(IVSize, 1); 1843 } else { 1844 assert((Schedule == OMP_sch_static_chunked || 1845 Schedule == OMP_ord_static_chunked) && 1846 "expected static chunked schedule"); 1847 } 1848 llvm::Value *Args[] = { 1849 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1850 getThreadID(CGF, Loc), 1851 CGF.Builder.getInt32(Schedule), // Schedule type 1852 IL.getPointer(), // &isLastIter 1853 LB.getPointer(), // &LB 1854 UB.getPointer(), // &UB 1855 ST.getPointer(), // &Stride 1856 CGF.Builder.getIntN(IVSize, 1), // Incr 1857 Chunk // Chunk 1858 }; 1859 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); 1860 } 1861 1862 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 1863 SourceLocation Loc) { 1864 if (!CGF.HaveInsertPoint()) 1865 return; 1866 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1867 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1868 getThreadID(CGF, Loc)}; 1869 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1870 Args); 1871 } 1872 1873 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 1874 SourceLocation Loc, 1875 unsigned IVSize, 1876 bool IVSigned) { 1877 if (!CGF.HaveInsertPoint()) 1878 return; 1879 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 1880 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1881 getThreadID(CGF, Loc)}; 1882 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 1883 } 1884 1885 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 1886 SourceLocation Loc, unsigned IVSize, 1887 bool IVSigned, Address IL, 1888 Address LB, Address UB, 1889 Address ST) { 1890 // Call __kmpc_dispatch_next( 1891 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 1892 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 1893 // kmp_int[32|64] *p_stride); 1894 llvm::Value *Args[] = { 1895 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 1896 IL.getPointer(), // &isLastIter 1897 LB.getPointer(), // &Lower 1898 UB.getPointer(), // &Upper 1899 ST.getPointer() // &Stride 1900 }; 1901 llvm::Value *Call = 1902 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 1903 return CGF.EmitScalarConversion( 1904 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 1905 CGF.getContext().BoolTy, Loc); 1906 } 1907 1908 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1909 llvm::Value *NumThreads, 1910 SourceLocation Loc) { 1911 if (!CGF.HaveInsertPoint()) 1912 return; 1913 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1914 llvm::Value *Args[] = { 1915 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1916 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1917 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1918 Args); 1919 } 1920 1921 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 1922 OpenMPProcBindClauseKind ProcBind, 1923 SourceLocation Loc) { 1924 if (!CGF.HaveInsertPoint()) 1925 return; 1926 // Constants for proc bind value accepted by the runtime. 1927 enum ProcBindTy { 1928 ProcBindFalse = 0, 1929 ProcBindTrue, 1930 ProcBindMaster, 1931 ProcBindClose, 1932 ProcBindSpread, 1933 ProcBindIntel, 1934 ProcBindDefault 1935 } RuntimeProcBind; 1936 switch (ProcBind) { 1937 case OMPC_PROC_BIND_master: 1938 RuntimeProcBind = ProcBindMaster; 1939 break; 1940 case OMPC_PROC_BIND_close: 1941 RuntimeProcBind = ProcBindClose; 1942 break; 1943 case OMPC_PROC_BIND_spread: 1944 RuntimeProcBind = ProcBindSpread; 1945 break; 1946 case OMPC_PROC_BIND_unknown: 1947 llvm_unreachable("Unsupported proc_bind value."); 1948 } 1949 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 1950 llvm::Value *Args[] = { 1951 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1952 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 1953 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 1954 } 1955 1956 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1957 SourceLocation Loc) { 1958 if (!CGF.HaveInsertPoint()) 1959 return; 1960 // Build call void __kmpc_flush(ident_t *loc) 1961 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1962 emitUpdateLocation(CGF, Loc)); 1963 } 1964 1965 namespace { 1966 /// \brief Indexes of fields for type kmp_task_t. 1967 enum KmpTaskTFields { 1968 /// \brief List of shared variables. 1969 KmpTaskTShareds, 1970 /// \brief Task routine. 1971 KmpTaskTRoutine, 1972 /// \brief Partition id for the untied tasks. 1973 KmpTaskTPartId, 1974 /// \brief Function with call of destructors for private variables. 1975 KmpTaskTDestructors, 1976 }; 1977 } // anonymous namespace 1978 1979 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 1980 // FIXME: Add other entries type when they become supported. 1981 return OffloadEntriesTargetRegion.empty(); 1982 } 1983 1984 /// \brief Initialize target region entry. 1985 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 1986 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 1987 StringRef ParentName, unsigned LineNum, 1988 unsigned ColNum, unsigned Order) { 1989 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 1990 "only required for the device " 1991 "code generation."); 1992 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = 1993 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); 1994 ++OffloadingEntriesNum; 1995 } 1996 1997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 1998 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 1999 StringRef ParentName, unsigned LineNum, 2000 unsigned ColNum, llvm::Constant *Addr, 2001 llvm::Constant *ID) { 2002 // If we are emitting code for a target, the entry is already initialized, 2003 // only has to be registered. 2004 if (CGM.getLangOpts().OpenMPIsDevice) { 2005 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2006 ColNum) && 2007 "Entry must exist."); 2008 auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName] 2009 [LineNum][ColNum]; 2010 assert(Entry.isValid() && "Entry not initialized!"); 2011 Entry.setAddress(Addr); 2012 Entry.setID(ID); 2013 return; 2014 } else { 2015 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); 2016 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = 2017 Entry; 2018 } 2019 } 2020 2021 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2022 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2023 unsigned ColNum) const { 2024 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2025 if (PerDevice == OffloadEntriesTargetRegion.end()) 2026 return false; 2027 auto PerFile = PerDevice->second.find(FileID); 2028 if (PerFile == PerDevice->second.end()) 2029 return false; 2030 auto PerParentName = PerFile->second.find(ParentName); 2031 if (PerParentName == PerFile->second.end()) 2032 return false; 2033 auto PerLine = PerParentName->second.find(LineNum); 2034 if (PerLine == PerParentName->second.end()) 2035 return false; 2036 auto PerColumn = PerLine->second.find(ColNum); 2037 if (PerColumn == PerLine->second.end()) 2038 return false; 2039 // Fail if this entry is already registered. 2040 if (PerColumn->second.getAddress() || PerColumn->second.getID()) 2041 return false; 2042 return true; 2043 } 2044 2045 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2046 const OffloadTargetRegionEntryInfoActTy &Action) { 2047 // Scan all target region entries and perform the provided action. 2048 for (auto &D : OffloadEntriesTargetRegion) 2049 for (auto &F : D.second) 2050 for (auto &P : F.second) 2051 for (auto &L : P.second) 2052 for (auto &C : L.second) 2053 Action(D.first, F.first, P.first(), L.first, C.first, C.second); 2054 } 2055 2056 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 2057 /// \a Codegen. This is used to emit the two functions that register and 2058 /// unregister the descriptor of the current compilation unit. 2059 static llvm::Function * 2060 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 2061 const RegionCodeGenTy &Codegen) { 2062 auto &C = CGM.getContext(); 2063 FunctionArgList Args; 2064 ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), 2065 /*Id=*/nullptr, C.VoidPtrTy); 2066 Args.push_back(&DummyPtr); 2067 2068 CodeGenFunction CGF(CGM); 2069 GlobalDecl(); 2070 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 2071 C.VoidTy, Args, FunctionType::ExtInfo(), 2072 /*isVariadic=*/false); 2073 auto FTy = CGM.getTypes().GetFunctionType(FI); 2074 auto *Fn = 2075 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 2076 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 2077 Codegen(CGF); 2078 CGF.FinishFunction(); 2079 return Fn; 2080 } 2081 2082 llvm::Function * 2083 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 2084 2085 // If we don't have entries or if we are emitting code for the device, we 2086 // don't need to do anything. 2087 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 2088 return nullptr; 2089 2090 auto &M = CGM.getModule(); 2091 auto &C = CGM.getContext(); 2092 2093 // Get list of devices we care about 2094 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 2095 2096 // We should be creating an offloading descriptor only if there are devices 2097 // specified. 2098 assert(!Devices.empty() && "No OpenMP offloading devices??"); 2099 2100 // Create the external variables that will point to the begin and end of the 2101 // host entries section. These will be defined by the linker. 2102 auto *OffloadEntryTy = 2103 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 2104 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 2105 M, OffloadEntryTy, /*isConstant=*/true, 2106 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2107 ".omp_offloading.entries_begin"); 2108 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 2109 M, OffloadEntryTy, /*isConstant=*/true, 2110 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 2111 ".omp_offloading.entries_end"); 2112 2113 // Create all device images 2114 llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires; 2115 auto *DeviceImageTy = cast<llvm::StructType>( 2116 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 2117 2118 for (unsigned i = 0; i < Devices.size(); ++i) { 2119 StringRef T = Devices[i].getTriple(); 2120 auto *ImgBegin = new llvm::GlobalVariable( 2121 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2122 /*Initializer=*/nullptr, 2123 Twine(".omp_offloading.img_start.") + Twine(T)); 2124 auto *ImgEnd = new llvm::GlobalVariable( 2125 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2126 /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); 2127 2128 llvm::Constant *Dev = 2129 llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd, 2130 HostEntriesBegin, HostEntriesEnd, nullptr); 2131 DeviceImagesEntires.push_back(Dev); 2132 } 2133 2134 // Create device images global array. 2135 llvm::ArrayType *DeviceImagesInitTy = 2136 llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size()); 2137 llvm::Constant *DeviceImagesInit = 2138 llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires); 2139 2140 llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable( 2141 M, DeviceImagesInitTy, /*isConstant=*/true, 2142 llvm::GlobalValue::InternalLinkage, DeviceImagesInit, 2143 ".omp_offloading.device_images"); 2144 DeviceImages->setUnnamedAddr(true); 2145 2146 // This is a Zero array to be used in the creation of the constant expressions 2147 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 2148 llvm::Constant::getNullValue(CGM.Int32Ty)}; 2149 2150 // Create the target region descriptor. 2151 auto *BinaryDescriptorTy = cast<llvm::StructType>( 2152 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 2153 llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get( 2154 BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 2155 llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages, 2156 Index), 2157 HostEntriesBegin, HostEntriesEnd, nullptr); 2158 2159 auto *Desc = new llvm::GlobalVariable( 2160 M, BinaryDescriptorTy, /*isConstant=*/true, 2161 llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit, 2162 ".omp_offloading.descriptor"); 2163 2164 // Emit code to register or unregister the descriptor at execution 2165 // startup or closing, respectively. 2166 2167 // Create a variable to drive the registration and unregistration of the 2168 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 2169 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 2170 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 2171 IdentInfo, C.CharTy); 2172 2173 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 2174 CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) { 2175 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 2176 Desc); 2177 }); 2178 auto *RegFn = createOffloadingBinaryDescriptorFunction( 2179 CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) { 2180 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 2181 Desc); 2182 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 2183 }); 2184 return RegFn; 2185 } 2186 2187 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name, 2188 uint64_t Size) { 2189 auto *TgtOffloadEntryType = cast<llvm::StructType>( 2190 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 2191 llvm::LLVMContext &C = CGM.getModule().getContext(); 2192 llvm::Module &M = CGM.getModule(); 2193 2194 // Make sure the address has the right type. 2195 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy); 2196 2197 // Create constant string with the name. 2198 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 2199 2200 llvm::GlobalVariable *Str = 2201 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 2202 llvm::GlobalValue::InternalLinkage, StrPtrInit, 2203 ".omp_offloading.entry_name"); 2204 Str->setUnnamedAddr(true); 2205 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 2206 2207 // Create the entry struct. 2208 llvm::Constant *EntryInit = llvm::ConstantStruct::get( 2209 TgtOffloadEntryType, AddrPtr, StrPtr, 2210 llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr); 2211 llvm::GlobalVariable *Entry = new llvm::GlobalVariable( 2212 M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage, 2213 EntryInit, ".omp_offloading.entry"); 2214 2215 // The entry has to be created in the section the linker expects it to be. 2216 Entry->setSection(".omp_offloading.entries"); 2217 // We can't have any padding between symbols, so we need to have 1-byte 2218 // alignment. 2219 Entry->setAlignment(1); 2220 return; 2221 } 2222 2223 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2224 // Emit the offloading entries and metadata so that the device codegen side 2225 // can 2226 // easily figure out what to emit. The produced metadata looks like this: 2227 // 2228 // !omp_offload.info = !{!1, ...} 2229 // 2230 // Right now we only generate metadata for function that contain target 2231 // regions. 2232 2233 // If we do not have entries, we dont need to do anything. 2234 if (OffloadEntriesInfoManager.empty()) 2235 return; 2236 2237 llvm::Module &M = CGM.getModule(); 2238 llvm::LLVMContext &C = M.getContext(); 2239 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 2240 OrderedEntries(OffloadEntriesInfoManager.size()); 2241 2242 // Create the offloading info metadata node. 2243 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 2244 2245 // Auxiliar methods to create metadata values and strings. 2246 auto getMDInt = [&](unsigned v) { 2247 return llvm::ConstantAsMetadata::get( 2248 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 2249 }; 2250 2251 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 2252 2253 // Create function that emits metadata for each target region entry; 2254 auto &&TargetRegionMetadataEmitter = [&]( 2255 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 2256 unsigned Column, 2257 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 2258 llvm::SmallVector<llvm::Metadata *, 32> Ops; 2259 // Generate metadata for target regions. Each entry of this metadata 2260 // contains: 2261 // - Entry 0 -> Kind of this type of metadata (0). 2262 // - Entry 1 -> Device ID of the file where the entry was identified. 2263 // - Entry 2 -> File ID of the file where the entry was identified. 2264 // - Entry 3 -> Mangled name of the function where the entry was identified. 2265 // - Entry 4 -> Line in the file where the entry was identified. 2266 // - Entry 5 -> Column in the file where the entry was identified. 2267 // - Entry 6 -> Order the entry was created. 2268 // The first element of the metadata node is the kind. 2269 Ops.push_back(getMDInt(E.getKind())); 2270 Ops.push_back(getMDInt(DeviceID)); 2271 Ops.push_back(getMDInt(FileID)); 2272 Ops.push_back(getMDString(ParentName)); 2273 Ops.push_back(getMDInt(Line)); 2274 Ops.push_back(getMDInt(Column)); 2275 Ops.push_back(getMDInt(E.getOrder())); 2276 2277 // Save this entry in the right position of the ordered entries array. 2278 OrderedEntries[E.getOrder()] = &E; 2279 2280 // Add metadata to the named metadata node. 2281 MD->addOperand(llvm::MDNode::get(C, Ops)); 2282 }; 2283 2284 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 2285 TargetRegionMetadataEmitter); 2286 2287 for (auto *E : OrderedEntries) { 2288 assert(E && "All ordered entries must exist!"); 2289 if (auto *CE = 2290 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 2291 E)) { 2292 assert(CE->getID() && CE->getAddress() && 2293 "Entry ID and Addr are invalid!"); 2294 createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0); 2295 } else 2296 llvm_unreachable("Unsupported entry kind."); 2297 } 2298 } 2299 2300 /// \brief Loads all the offload entries information from the host IR 2301 /// metadata. 2302 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 2303 // If we are in target mode, load the metadata from the host IR. This code has 2304 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 2305 2306 if (!CGM.getLangOpts().OpenMPIsDevice) 2307 return; 2308 2309 if (CGM.getLangOpts().OMPHostIRFile.empty()) 2310 return; 2311 2312 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 2313 if (Buf.getError()) 2314 return; 2315 2316 llvm::LLVMContext C; 2317 auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C); 2318 2319 if (ME.getError()) 2320 return; 2321 2322 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 2323 if (!MD) 2324 return; 2325 2326 for (auto I : MD->operands()) { 2327 llvm::MDNode *MN = cast<llvm::MDNode>(I); 2328 2329 auto getMDInt = [&](unsigned Idx) { 2330 llvm::ConstantAsMetadata *V = 2331 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 2332 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 2333 }; 2334 2335 auto getMDString = [&](unsigned Idx) { 2336 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 2337 return V->getString(); 2338 }; 2339 2340 switch (getMDInt(0)) { 2341 default: 2342 llvm_unreachable("Unexpected metadata!"); 2343 break; 2344 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 2345 OFFLOAD_ENTRY_INFO_TARGET_REGION: 2346 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 2347 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 2348 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 2349 /*Column=*/getMDInt(5), /*Order=*/getMDInt(6)); 2350 break; 2351 } 2352 } 2353 } 2354 2355 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 2356 if (!KmpRoutineEntryPtrTy) { 2357 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 2358 auto &C = CGM.getContext(); 2359 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 2360 FunctionProtoType::ExtProtoInfo EPI; 2361 KmpRoutineEntryPtrQTy = C.getPointerType( 2362 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 2363 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 2364 } 2365 } 2366 2367 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 2368 QualType FieldTy) { 2369 auto *Field = FieldDecl::Create( 2370 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 2371 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 2372 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 2373 Field->setAccess(AS_public); 2374 DC->addDecl(Field); 2375 return Field; 2376 } 2377 2378 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 2379 2380 // Make sure the type of the entry is already created. This is the type we 2381 // have to create: 2382 // struct __tgt_offload_entry{ 2383 // void *addr; // Pointer to the offload entry info. 2384 // // (function or global) 2385 // char *name; // Name of the function or global. 2386 // size_t size; // Size of the entry info (0 if it a function). 2387 // }; 2388 if (TgtOffloadEntryQTy.isNull()) { 2389 ASTContext &C = CGM.getContext(); 2390 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 2391 RD->startDefinition(); 2392 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2393 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 2394 addFieldToRecordDecl(C, RD, C.getSizeType()); 2395 RD->completeDefinition(); 2396 TgtOffloadEntryQTy = C.getRecordType(RD); 2397 } 2398 return TgtOffloadEntryQTy; 2399 } 2400 2401 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 2402 // These are the types we need to build: 2403 // struct __tgt_device_image{ 2404 // void *ImageStart; // Pointer to the target code start. 2405 // void *ImageEnd; // Pointer to the target code end. 2406 // // We also add the host entries to the device image, as it may be useful 2407 // // for the target runtime to have access to that information. 2408 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 2409 // // the entries. 2410 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 2411 // // entries (non inclusive). 2412 // }; 2413 if (TgtDeviceImageQTy.isNull()) { 2414 ASTContext &C = CGM.getContext(); 2415 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 2416 RD->startDefinition(); 2417 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2418 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2419 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2420 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2421 RD->completeDefinition(); 2422 TgtDeviceImageQTy = C.getRecordType(RD); 2423 } 2424 return TgtDeviceImageQTy; 2425 } 2426 2427 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 2428 // struct __tgt_bin_desc{ 2429 // int32_t NumDevices; // Number of devices supported. 2430 // __tgt_device_image *DeviceImages; // Arrays of device images 2431 // // (one per device). 2432 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 2433 // // entries. 2434 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 2435 // // entries (non inclusive). 2436 // }; 2437 if (TgtBinaryDescriptorQTy.isNull()) { 2438 ASTContext &C = CGM.getContext(); 2439 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 2440 RD->startDefinition(); 2441 addFieldToRecordDecl( 2442 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 2443 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 2444 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2445 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2446 RD->completeDefinition(); 2447 TgtBinaryDescriptorQTy = C.getRecordType(RD); 2448 } 2449 return TgtBinaryDescriptorQTy; 2450 } 2451 2452 namespace { 2453 struct PrivateHelpersTy { 2454 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 2455 const VarDecl *PrivateElemInit) 2456 : Original(Original), PrivateCopy(PrivateCopy), 2457 PrivateElemInit(PrivateElemInit) {} 2458 const VarDecl *Original; 2459 const VarDecl *PrivateCopy; 2460 const VarDecl *PrivateElemInit; 2461 }; 2462 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 2463 } // anonymous namespace 2464 2465 static RecordDecl * 2466 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 2467 if (!Privates.empty()) { 2468 auto &C = CGM.getContext(); 2469 // Build struct .kmp_privates_t. { 2470 // /* private vars */ 2471 // }; 2472 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 2473 RD->startDefinition(); 2474 for (auto &&Pair : Privates) { 2475 auto *VD = Pair.second.Original; 2476 auto Type = VD->getType(); 2477 Type = Type.getNonReferenceType(); 2478 auto *FD = addFieldToRecordDecl(C, RD, Type); 2479 if (VD->hasAttrs()) { 2480 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 2481 E(VD->getAttrs().end()); 2482 I != E; ++I) 2483 FD->addAttr(*I); 2484 } 2485 } 2486 RD->completeDefinition(); 2487 return RD; 2488 } 2489 return nullptr; 2490 } 2491 2492 static RecordDecl * 2493 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, 2494 QualType KmpRoutineEntryPointerQTy) { 2495 auto &C = CGM.getContext(); 2496 // Build struct kmp_task_t { 2497 // void * shareds; 2498 // kmp_routine_entry_t routine; 2499 // kmp_int32 part_id; 2500 // kmp_routine_entry_t destructors; 2501 // }; 2502 auto *RD = C.buildImplicitRecord("kmp_task_t"); 2503 RD->startDefinition(); 2504 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2505 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2506 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2507 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2508 RD->completeDefinition(); 2509 return RD; 2510 } 2511 2512 static RecordDecl * 2513 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 2514 ArrayRef<PrivateDataTy> Privates) { 2515 auto &C = CGM.getContext(); 2516 // Build struct kmp_task_t_with_privates { 2517 // kmp_task_t task_data; 2518 // .kmp_privates_t. privates; 2519 // }; 2520 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 2521 RD->startDefinition(); 2522 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 2523 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 2524 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 2525 } 2526 RD->completeDefinition(); 2527 return RD; 2528 } 2529 2530 /// \brief Emit a proxy function which accepts kmp_task_t as the second 2531 /// argument. 2532 /// \code 2533 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 2534 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, 2535 /// tt->shareds); 2536 /// return 0; 2537 /// } 2538 /// \endcode 2539 static llvm::Value * 2540 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 2541 QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, 2542 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 2543 QualType SharedsPtrTy, llvm::Value *TaskFunction, 2544 llvm::Value *TaskPrivatesMap) { 2545 auto &C = CGM.getContext(); 2546 FunctionArgList Args; 2547 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 2548 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 2549 /*Id=*/nullptr, 2550 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 2551 Args.push_back(&GtidArg); 2552 Args.push_back(&TaskTypeArg); 2553 FunctionType::ExtInfo Info; 2554 auto &TaskEntryFnInfo = 2555 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 2556 /*isVariadic=*/false); 2557 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 2558 auto *TaskEntry = 2559 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 2560 ".omp_task_entry.", &CGM.getModule()); 2561 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 2562 CodeGenFunction CGF(CGM); 2563 CGF.disableDebugInfo(); 2564 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 2565 2566 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 2567 // tt->task_data.shareds); 2568 auto *GtidParam = CGF.EmitLoadOfScalar( 2569 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 2570 LValue TDBase = emitLoadOfPointerLValue( 2571 CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); 2572 auto *KmpTaskTWithPrivatesQTyRD = 2573 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2574 LValue Base = 2575 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2576 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2577 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 2578 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 2579 auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); 2580 2581 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 2582 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 2583 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2584 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 2585 CGF.ConvertTypeForMem(SharedsPtrTy)); 2586 2587 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 2588 llvm::Value *PrivatesParam; 2589 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 2590 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 2591 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2592 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 2593 } else { 2594 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2595 } 2596 2597 llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, 2598 TaskPrivatesMap, SharedsParam}; 2599 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 2600 CGF.EmitStoreThroughLValue( 2601 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 2602 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 2603 CGF.FinishFunction(); 2604 return TaskEntry; 2605 } 2606 2607 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 2608 SourceLocation Loc, 2609 QualType KmpInt32Ty, 2610 QualType KmpTaskTWithPrivatesPtrQTy, 2611 QualType KmpTaskTWithPrivatesQTy) { 2612 auto &C = CGM.getContext(); 2613 FunctionArgList Args; 2614 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 2615 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 2616 /*Id=*/nullptr, 2617 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 2618 Args.push_back(&GtidArg); 2619 Args.push_back(&TaskTypeArg); 2620 FunctionType::ExtInfo Info; 2621 auto &DestructorFnInfo = 2622 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 2623 /*isVariadic=*/false); 2624 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 2625 auto *DestructorFn = 2626 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 2627 ".omp_task_destructor.", &CGM.getModule()); 2628 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 2629 DestructorFnInfo); 2630 CodeGenFunction CGF(CGM); 2631 CGF.disableDebugInfo(); 2632 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 2633 Args); 2634 2635 LValue Base = emitLoadOfPointerLValue( 2636 CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); 2637 auto *KmpTaskTWithPrivatesQTyRD = 2638 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2639 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2640 Base = CGF.EmitLValueForField(Base, *FI); 2641 for (auto *Field : 2642 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 2643 if (auto DtorKind = Field->getType().isDestructedType()) { 2644 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 2645 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 2646 } 2647 } 2648 CGF.FinishFunction(); 2649 return DestructorFn; 2650 } 2651 2652 /// \brief Emit a privates mapping function for correct handling of private and 2653 /// firstprivate variables. 2654 /// \code 2655 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 2656 /// **noalias priv1,..., <tyn> **noalias privn) { 2657 /// *priv1 = &.privates.priv1; 2658 /// ...; 2659 /// *privn = &.privates.privn; 2660 /// } 2661 /// \endcode 2662 static llvm::Value * 2663 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 2664 ArrayRef<const Expr *> PrivateVars, 2665 ArrayRef<const Expr *> FirstprivateVars, 2666 QualType PrivatesQTy, 2667 ArrayRef<PrivateDataTy> Privates) { 2668 auto &C = CGM.getContext(); 2669 FunctionArgList Args; 2670 ImplicitParamDecl TaskPrivatesArg( 2671 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 2672 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 2673 Args.push_back(&TaskPrivatesArg); 2674 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 2675 unsigned Counter = 1; 2676 for (auto *E: PrivateVars) { 2677 Args.push_back(ImplicitParamDecl::Create( 2678 C, /*DC=*/nullptr, Loc, 2679 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2680 .withConst() 2681 .withRestrict())); 2682 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2683 PrivateVarsPos[VD] = Counter; 2684 ++Counter; 2685 } 2686 for (auto *E : FirstprivateVars) { 2687 Args.push_back(ImplicitParamDecl::Create( 2688 C, /*DC=*/nullptr, Loc, 2689 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2690 .withConst() 2691 .withRestrict())); 2692 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2693 PrivateVarsPos[VD] = Counter; 2694 ++Counter; 2695 } 2696 FunctionType::ExtInfo Info; 2697 auto &TaskPrivatesMapFnInfo = 2698 CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info, 2699 /*isVariadic=*/false); 2700 auto *TaskPrivatesMapTy = 2701 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 2702 auto *TaskPrivatesMap = llvm::Function::Create( 2703 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 2704 ".omp_task_privates_map.", &CGM.getModule()); 2705 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 2706 TaskPrivatesMapFnInfo); 2707 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 2708 CodeGenFunction CGF(CGM); 2709 CGF.disableDebugInfo(); 2710 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 2711 TaskPrivatesMapFnInfo, Args); 2712 2713 // *privi = &.privates.privi; 2714 LValue Base = emitLoadOfPointerLValue( 2715 CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()); 2716 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 2717 Counter = 0; 2718 for (auto *Field : PrivatesQTyRD->fields()) { 2719 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 2720 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 2721 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 2722 auto RefLoadLVal = 2723 emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType()); 2724 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 2725 ++Counter; 2726 } 2727 CGF.FinishFunction(); 2728 return TaskPrivatesMap; 2729 } 2730 2731 static int array_pod_sort_comparator(const PrivateDataTy *P1, 2732 const PrivateDataTy *P2) { 2733 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 2734 } 2735 2736 void CGOpenMPRuntime::emitTaskCall( 2737 CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, 2738 bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 2739 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 2740 const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, 2741 ArrayRef<const Expr *> PrivateCopies, 2742 ArrayRef<const Expr *> FirstprivateVars, 2743 ArrayRef<const Expr *> FirstprivateCopies, 2744 ArrayRef<const Expr *> FirstprivateInits, 2745 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { 2746 if (!CGF.HaveInsertPoint()) 2747 return; 2748 auto &C = CGM.getContext(); 2749 llvm::SmallVector<PrivateDataTy, 8> Privates; 2750 // Aggregate privates and sort them by the alignment. 2751 auto I = PrivateCopies.begin(); 2752 for (auto *E : PrivateVars) { 2753 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2754 Privates.push_back(std::make_pair( 2755 C.getDeclAlign(VD), 2756 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2757 /*PrivateElemInit=*/nullptr))); 2758 ++I; 2759 } 2760 I = FirstprivateCopies.begin(); 2761 auto IElemInitRef = FirstprivateInits.begin(); 2762 for (auto *E : FirstprivateVars) { 2763 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2764 Privates.push_back(std::make_pair( 2765 C.getDeclAlign(VD), 2766 PrivateHelpersTy( 2767 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2768 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 2769 ++I, ++IElemInitRef; 2770 } 2771 llvm::array_pod_sort(Privates.begin(), Privates.end(), 2772 array_pod_sort_comparator); 2773 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2774 // Build type kmp_routine_entry_t (if not built yet). 2775 emitKmpRoutineEntryT(KmpInt32Ty); 2776 // Build type kmp_task_t (if not built yet). 2777 if (KmpTaskTQTy.isNull()) { 2778 KmpTaskTQTy = C.getRecordType( 2779 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); 2780 } 2781 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2782 // Build particular struct kmp_task_t for the given task. 2783 auto *KmpTaskTWithPrivatesQTyRD = 2784 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 2785 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 2786 QualType KmpTaskTWithPrivatesPtrQTy = 2787 C.getPointerType(KmpTaskTWithPrivatesQTy); 2788 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 2789 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 2790 auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 2791 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 2792 2793 // Emit initial values for private copies (if any). 2794 llvm::Value *TaskPrivatesMap = nullptr; 2795 auto *TaskPrivatesMapTy = 2796 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 2797 3) 2798 ->getType(); 2799 if (!Privates.empty()) { 2800 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2801 TaskPrivatesMap = emitTaskPrivateMappingFunction( 2802 CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); 2803 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2804 TaskPrivatesMap, TaskPrivatesMapTy); 2805 } else { 2806 TaskPrivatesMap = llvm::ConstantPointerNull::get( 2807 cast<llvm::PointerType>(TaskPrivatesMapTy)); 2808 } 2809 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 2810 // kmp_task_t *tt); 2811 auto *TaskEntry = emitProxyTaskFunction( 2812 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, 2813 KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); 2814 2815 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2816 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2817 // kmp_routine_entry_t *task_entry); 2818 // Task flags. Format is taken from 2819 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 2820 // description of kmp_tasking_flags struct. 2821 const unsigned TiedFlag = 0x1; 2822 const unsigned FinalFlag = 0x2; 2823 unsigned Flags = Tied ? TiedFlag : 0; 2824 auto *TaskFlags = 2825 Final.getPointer() 2826 ? CGF.Builder.CreateSelect(Final.getPointer(), 2827 CGF.Builder.getInt32(FinalFlag), 2828 CGF.Builder.getInt32(/*C=*/0)) 2829 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 2830 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 2831 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 2832 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 2833 getThreadID(CGF, Loc), TaskFlags, 2834 KmpTaskTWithPrivatesTySize, SharedsSize, 2835 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2836 TaskEntry, KmpRoutineEntryPtrTy)}; 2837 auto *NewTask = CGF.EmitRuntimeCall( 2838 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 2839 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2840 NewTask, KmpTaskTWithPrivatesPtrTy); 2841 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 2842 KmpTaskTWithPrivatesQTy); 2843 LValue TDBase = 2844 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2845 // Fill the data in the resulting kmp_task_t record. 2846 // Copy shareds if there are any. 2847 Address KmpTaskSharedsPtr = Address::invalid(); 2848 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 2849 KmpTaskSharedsPtr = 2850 Address(CGF.EmitLoadOfScalar( 2851 CGF.EmitLValueForField( 2852 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 2853 KmpTaskTShareds)), 2854 Loc), 2855 CGF.getNaturalTypeAlignment(SharedsTy)); 2856 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 2857 } 2858 // Emit initial values for private copies (if any). 2859 bool NeedsCleanup = false; 2860 if (!Privates.empty()) { 2861 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2862 auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); 2863 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 2864 LValue SharedsBase; 2865 if (!FirstprivateVars.empty()) { 2866 SharedsBase = CGF.MakeAddrLValue( 2867 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2868 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 2869 SharedsTy); 2870 } 2871 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 2872 cast<CapturedStmt>(*D.getAssociatedStmt())); 2873 for (auto &&Pair : Privates) { 2874 auto *VD = Pair.second.PrivateCopy; 2875 auto *Init = VD->getAnyInitializer(); 2876 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 2877 if (Init) { 2878 if (auto *Elem = Pair.second.PrivateElemInit) { 2879 auto *OriginalVD = Pair.second.Original; 2880 auto *SharedField = CapturesInfo.lookup(OriginalVD); 2881 auto SharedRefLValue = 2882 CGF.EmitLValueForField(SharedsBase, SharedField); 2883 SharedRefLValue = CGF.MakeAddrLValue( 2884 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 2885 SharedRefLValue.getType(), AlignmentSource::Decl); 2886 QualType Type = OriginalVD->getType(); 2887 if (Type->isArrayType()) { 2888 // Initialize firstprivate array. 2889 if (!isa<CXXConstructExpr>(Init) || 2890 CGF.isTrivialInitializer(Init)) { 2891 // Perform simple memcpy. 2892 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 2893 SharedRefLValue.getAddress(), Type); 2894 } else { 2895 // Initialize firstprivate array using element-by-element 2896 // intialization. 2897 CGF.EmitOMPAggregateAssign( 2898 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), 2899 Type, [&CGF, Elem, Init, &CapturesInfo]( 2900 Address DestElement, Address SrcElement) { 2901 // Clean up any temporaries needed by the initialization. 2902 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2903 InitScope.addPrivate(Elem, [SrcElement]() -> Address { 2904 return SrcElement; 2905 }); 2906 (void)InitScope.Privatize(); 2907 // Emit initialization for single element. 2908 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 2909 CGF, &CapturesInfo); 2910 CGF.EmitAnyExprToMem(Init, DestElement, 2911 Init->getType().getQualifiers(), 2912 /*IsInitializer=*/false); 2913 }); 2914 } 2915 } else { 2916 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2917 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 2918 return SharedRefLValue.getAddress(); 2919 }); 2920 (void)InitScope.Privatize(); 2921 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 2922 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 2923 /*capturedByInit=*/false); 2924 } 2925 } else { 2926 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 2927 } 2928 } 2929 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); 2930 ++FI; 2931 } 2932 } 2933 // Provide pointer to function with destructors for privates. 2934 llvm::Value *DestructorFn = 2935 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, 2936 KmpTaskTWithPrivatesPtrQTy, 2937 KmpTaskTWithPrivatesQTy) 2938 : llvm::ConstantPointerNull::get( 2939 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 2940 LValue Destructor = CGF.EmitLValueForField( 2941 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); 2942 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2943 DestructorFn, KmpRoutineEntryPtrTy), 2944 Destructor); 2945 2946 // Process list of dependences. 2947 Address DependenciesArray = Address::invalid(); 2948 unsigned NumDependencies = Dependences.size(); 2949 if (NumDependencies) { 2950 // Dependence kind for RTL. 2951 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 2952 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 2953 RecordDecl *KmpDependInfoRD; 2954 QualType FlagsTy = 2955 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 2956 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 2957 if (KmpDependInfoTy.isNull()) { 2958 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 2959 KmpDependInfoRD->startDefinition(); 2960 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 2961 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 2962 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 2963 KmpDependInfoRD->completeDefinition(); 2964 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 2965 } else { 2966 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 2967 } 2968 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 2969 // Define type kmp_depend_info[<Dependences.size()>]; 2970 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 2971 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 2972 ArrayType::Normal, /*IndexTypeQuals=*/0); 2973 // kmp_depend_info[<Dependences.size()>] deps; 2974 DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); 2975 for (unsigned i = 0; i < NumDependencies; ++i) { 2976 const Expr *E = Dependences[i].second; 2977 auto Addr = CGF.EmitLValue(E); 2978 llvm::Value *Size; 2979 QualType Ty = E->getType(); 2980 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 2981 LValue UpAddrLVal = 2982 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 2983 llvm::Value *UpAddr = 2984 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 2985 llvm::Value *LowIntPtr = 2986 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 2987 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 2988 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 2989 } else 2990 Size = CGF.getTypeSize(Ty); 2991 auto Base = CGF.MakeAddrLValue( 2992 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 2993 KmpDependInfoTy); 2994 // deps[i].base_addr = &<Dependences[i].second>; 2995 auto BaseAddrLVal = CGF.EmitLValueForField( 2996 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 2997 CGF.EmitStoreOfScalar( 2998 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 2999 BaseAddrLVal); 3000 // deps[i].len = sizeof(<Dependences[i].second>); 3001 auto LenLVal = CGF.EmitLValueForField( 3002 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 3003 CGF.EmitStoreOfScalar(Size, LenLVal); 3004 // deps[i].flags = <Dependences[i].first>; 3005 RTLDependenceKindTy DepKind; 3006 switch (Dependences[i].first) { 3007 case OMPC_DEPEND_in: 3008 DepKind = DepIn; 3009 break; 3010 // Out and InOut dependencies must use the same code. 3011 case OMPC_DEPEND_out: 3012 case OMPC_DEPEND_inout: 3013 DepKind = DepInOut; 3014 break; 3015 case OMPC_DEPEND_source: 3016 case OMPC_DEPEND_sink: 3017 case OMPC_DEPEND_unknown: 3018 llvm_unreachable("Unknown task dependence type"); 3019 } 3020 auto FlagsLVal = CGF.EmitLValueForField( 3021 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 3022 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 3023 FlagsLVal); 3024 } 3025 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3026 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 3027 CGF.VoidPtrTy); 3028 } 3029 3030 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 3031 // libcall. 3032 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 3033 // *new_task); 3034 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 3035 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 3036 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 3037 // list is not empty 3038 auto *ThreadID = getThreadID(CGF, Loc); 3039 auto *UpLoc = emitUpdateLocation(CGF, Loc); 3040 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 3041 llvm::Value *DepTaskArgs[7]; 3042 if (NumDependencies) { 3043 DepTaskArgs[0] = UpLoc; 3044 DepTaskArgs[1] = ThreadID; 3045 DepTaskArgs[2] = NewTask; 3046 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 3047 DepTaskArgs[4] = DependenciesArray.getPointer(); 3048 DepTaskArgs[5] = CGF.Builder.getInt32(0); 3049 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3050 } 3051 auto &&ThenCodeGen = [this, NumDependencies, 3052 &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) { 3053 // TODO: add check for untied tasks. 3054 if (NumDependencies) { 3055 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), 3056 DepTaskArgs); 3057 } else { 3058 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 3059 TaskArgs); 3060 } 3061 }; 3062 typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> 3063 IfCallEndCleanup; 3064 3065 llvm::Value *DepWaitTaskArgs[6]; 3066 if (NumDependencies) { 3067 DepWaitTaskArgs[0] = UpLoc; 3068 DepWaitTaskArgs[1] = ThreadID; 3069 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 3070 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 3071 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 3072 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3073 } 3074 auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 3075 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) { 3076 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 3077 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 3078 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 3079 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 3080 // is specified. 3081 if (NumDependencies) 3082 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 3083 DepWaitTaskArgs); 3084 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 3085 // kmp_task_t *new_task); 3086 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), 3087 TaskArgs); 3088 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 3089 // kmp_task_t *new_task); 3090 CGF.EHStack.pushCleanup<IfCallEndCleanup>( 3091 NormalAndEHCleanup, 3092 createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), 3093 llvm::makeArrayRef(TaskArgs)); 3094 3095 // Call proxy_task_entry(gtid, new_task); 3096 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 3097 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 3098 }; 3099 3100 if (IfCond) { 3101 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 3102 } else { 3103 CodeGenFunction::RunCleanupsScope Scope(CGF); 3104 ThenCodeGen(CGF); 3105 } 3106 } 3107 3108 /// \brief Emit reduction operation for each element of array (required for 3109 /// array sections) LHS op = RHS. 3110 /// \param Type Type of array. 3111 /// \param LHSVar Variable on the left side of the reduction operation 3112 /// (references element of array in original variable). 3113 /// \param RHSVar Variable on the right side of the reduction operation 3114 /// (references element of array in original variable). 3115 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 3116 /// RHSVar. 3117 static void EmitOMPAggregateReduction( 3118 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 3119 const VarDecl *RHSVar, 3120 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 3121 const Expr *, const Expr *)> &RedOpGen, 3122 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 3123 const Expr *UpExpr = nullptr) { 3124 // Perform element-by-element initialization. 3125 QualType ElementTy; 3126 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 3127 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 3128 3129 // Drill down to the base element type on both arrays. 3130 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 3131 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 3132 3133 auto RHSBegin = RHSAddr.getPointer(); 3134 auto LHSBegin = LHSAddr.getPointer(); 3135 // Cast from pointer to array type to pointer to single element. 3136 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 3137 // The basic structure here is a while-do loop. 3138 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 3139 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 3140 auto IsEmpty = 3141 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 3142 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 3143 3144 // Enter the loop body, making that address the current address. 3145 auto EntryBB = CGF.Builder.GetInsertBlock(); 3146 CGF.EmitBlock(BodyBB); 3147 3148 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 3149 3150 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 3151 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 3152 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 3153 Address RHSElementCurrent = 3154 Address(RHSElementPHI, 3155 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 3156 3157 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 3158 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 3159 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 3160 Address LHSElementCurrent = 3161 Address(LHSElementPHI, 3162 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 3163 3164 // Emit copy. 3165 CodeGenFunction::OMPPrivateScope Scope(CGF); 3166 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 3167 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 3168 Scope.Privatize(); 3169 RedOpGen(CGF, XExpr, EExpr, UpExpr); 3170 Scope.ForceCleanup(); 3171 3172 // Shift the address forward by one element. 3173 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 3174 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 3175 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 3176 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 3177 // Check whether we've reached the end. 3178 auto Done = 3179 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 3180 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 3181 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 3182 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 3183 3184 // Done. 3185 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 3186 } 3187 3188 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 3189 llvm::Type *ArgsType, 3190 ArrayRef<const Expr *> Privates, 3191 ArrayRef<const Expr *> LHSExprs, 3192 ArrayRef<const Expr *> RHSExprs, 3193 ArrayRef<const Expr *> ReductionOps) { 3194 auto &C = CGM.getContext(); 3195 3196 // void reduction_func(void *LHSArg, void *RHSArg); 3197 FunctionArgList Args; 3198 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 3199 C.VoidPtrTy); 3200 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 3201 C.VoidPtrTy); 3202 Args.push_back(&LHSArg); 3203 Args.push_back(&RHSArg); 3204 FunctionType::ExtInfo EI; 3205 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 3206 C.VoidTy, Args, EI, /*isVariadic=*/false); 3207 auto *Fn = llvm::Function::Create( 3208 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 3209 ".omp.reduction.reduction_func", &CGM.getModule()); 3210 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 3211 CodeGenFunction CGF(CGM); 3212 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 3213 3214 // Dst = (void*[n])(LHSArg); 3215 // Src = (void*[n])(RHSArg); 3216 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3217 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3218 ArgsType), CGF.getPointerAlign()); 3219 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3220 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3221 ArgsType), CGF.getPointerAlign()); 3222 3223 // ... 3224 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 3225 // ... 3226 CodeGenFunction::OMPPrivateScope Scope(CGF); 3227 auto IPriv = Privates.begin(); 3228 unsigned Idx = 0; 3229 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 3230 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 3231 Scope.addPrivate(RHSVar, [&]() -> Address { 3232 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 3233 }); 3234 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 3235 Scope.addPrivate(LHSVar, [&]() -> Address { 3236 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 3237 }); 3238 QualType PrivTy = (*IPriv)->getType(); 3239 if (PrivTy->isVariablyModifiedType()) { 3240 // Get array size and emit VLA type. 3241 ++Idx; 3242 Address Elem = 3243 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 3244 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 3245 auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); 3246 auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 3247 CodeGenFunction::OpaqueValueMapping OpaqueMap( 3248 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 3249 CGF.EmitVariablyModifiedType(PrivTy); 3250 } 3251 } 3252 Scope.Privatize(); 3253 IPriv = Privates.begin(); 3254 auto ILHS = LHSExprs.begin(); 3255 auto IRHS = RHSExprs.begin(); 3256 for (auto *E : ReductionOps) { 3257 if ((*IPriv)->getType()->isArrayType()) { 3258 // Emit reduction for array section. 3259 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3260 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3261 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3262 [=](CodeGenFunction &CGF, const Expr *, 3263 const Expr *, 3264 const Expr *) { CGF.EmitIgnoredExpr(E); }); 3265 } else 3266 // Emit reduction for array subscript or single variable. 3267 CGF.EmitIgnoredExpr(E); 3268 ++IPriv, ++ILHS, ++IRHS; 3269 } 3270 Scope.ForceCleanup(); 3271 CGF.FinishFunction(); 3272 return Fn; 3273 } 3274 3275 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 3276 ArrayRef<const Expr *> Privates, 3277 ArrayRef<const Expr *> LHSExprs, 3278 ArrayRef<const Expr *> RHSExprs, 3279 ArrayRef<const Expr *> ReductionOps, 3280 bool WithNowait, bool SimpleReduction) { 3281 if (!CGF.HaveInsertPoint()) 3282 return; 3283 // Next code should be emitted for reduction: 3284 // 3285 // static kmp_critical_name lock = { 0 }; 3286 // 3287 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 3288 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 3289 // ... 3290 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 3291 // *(Type<n>-1*)rhs[<n>-1]); 3292 // } 3293 // 3294 // ... 3295 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 3296 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 3297 // RedList, reduce_func, &<lock>)) { 3298 // case 1: 3299 // ... 3300 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 3301 // ... 3302 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 3303 // break; 3304 // case 2: 3305 // ... 3306 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 3307 // ... 3308 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 3309 // break; 3310 // default:; 3311 // } 3312 // 3313 // if SimpleReduction is true, only the next code is generated: 3314 // ... 3315 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 3316 // ... 3317 3318 auto &C = CGM.getContext(); 3319 3320 if (SimpleReduction) { 3321 CodeGenFunction::RunCleanupsScope Scope(CGF); 3322 auto IPriv = Privates.begin(); 3323 auto ILHS = LHSExprs.begin(); 3324 auto IRHS = RHSExprs.begin(); 3325 for (auto *E : ReductionOps) { 3326 if ((*IPriv)->getType()->isArrayType()) { 3327 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3328 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3329 EmitOMPAggregateReduction( 3330 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3331 [=](CodeGenFunction &CGF, const Expr *, const Expr *, 3332 const Expr *) { CGF.EmitIgnoredExpr(E); }); 3333 } else 3334 CGF.EmitIgnoredExpr(E); 3335 ++IPriv, ++ILHS, ++IRHS; 3336 } 3337 return; 3338 } 3339 3340 // 1. Build a list of reduction variables. 3341 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 3342 auto Size = RHSExprs.size(); 3343 for (auto *E : Privates) { 3344 if (E->getType()->isVariablyModifiedType()) 3345 // Reserve place for array size. 3346 ++Size; 3347 } 3348 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 3349 QualType ReductionArrayTy = 3350 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3351 /*IndexTypeQuals=*/0); 3352 Address ReductionList = 3353 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 3354 auto IPriv = Privates.begin(); 3355 unsigned Idx = 0; 3356 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 3357 Address Elem = 3358 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 3359 CGF.Builder.CreateStore( 3360 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3361 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 3362 Elem); 3363 if ((*IPriv)->getType()->isVariablyModifiedType()) { 3364 // Store array size. 3365 ++Idx; 3366 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 3367 CGF.getPointerSize()); 3368 llvm::Value *Size = CGF.Builder.CreateIntCast( 3369 CGF.getVLASize( 3370 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 3371 .first, 3372 CGF.SizeTy, /*isSigned=*/false); 3373 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 3374 Elem); 3375 } 3376 } 3377 3378 // 2. Emit reduce_func(). 3379 auto *ReductionFn = emitReductionFunction( 3380 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 3381 LHSExprs, RHSExprs, ReductionOps); 3382 3383 // 3. Create static kmp_critical_name lock = { 0 }; 3384 auto *Lock = getCriticalRegionLock(".reduction"); 3385 3386 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 3387 // RedList, reduce_func, &<lock>); 3388 auto *IdentTLoc = emitUpdateLocation( 3389 CGF, Loc, 3390 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); 3391 auto *ThreadId = getThreadID(CGF, Loc); 3392 auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 3393 auto *RL = 3394 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), 3395 CGF.VoidPtrTy); 3396 llvm::Value *Args[] = { 3397 IdentTLoc, // ident_t *<loc> 3398 ThreadId, // i32 <gtid> 3399 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 3400 ReductionArrayTySize, // size_type sizeof(RedList) 3401 RL, // void *RedList 3402 ReductionFn, // void (*) (void *, void *) <reduce_func> 3403 Lock // kmp_critical_name *&<lock> 3404 }; 3405 auto Res = CGF.EmitRuntimeCall( 3406 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 3407 : OMPRTL__kmpc_reduce), 3408 Args); 3409 3410 // 5. Build switch(res) 3411 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 3412 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 3413 3414 // 6. Build case 1: 3415 // ... 3416 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 3417 // ... 3418 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 3419 // break; 3420 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 3421 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 3422 CGF.EmitBlock(Case1BB); 3423 3424 { 3425 CodeGenFunction::RunCleanupsScope Scope(CGF); 3426 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 3427 llvm::Value *EndArgs[] = { 3428 IdentTLoc, // ident_t *<loc> 3429 ThreadId, // i32 <gtid> 3430 Lock // kmp_critical_name *&<lock> 3431 }; 3432 CGF.EHStack 3433 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 3434 NormalAndEHCleanup, 3435 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 3436 : OMPRTL__kmpc_end_reduce), 3437 llvm::makeArrayRef(EndArgs)); 3438 auto IPriv = Privates.begin(); 3439 auto ILHS = LHSExprs.begin(); 3440 auto IRHS = RHSExprs.begin(); 3441 for (auto *E : ReductionOps) { 3442 if ((*IPriv)->getType()->isArrayType()) { 3443 // Emit reduction for array section. 3444 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3445 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3446 EmitOMPAggregateReduction( 3447 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3448 [=](CodeGenFunction &CGF, const Expr *, const Expr *, 3449 const Expr *) { CGF.EmitIgnoredExpr(E); }); 3450 } else 3451 // Emit reduction for array subscript or single variable. 3452 CGF.EmitIgnoredExpr(E); 3453 ++IPriv, ++ILHS, ++IRHS; 3454 } 3455 } 3456 3457 CGF.EmitBranch(DefaultBB); 3458 3459 // 7. Build case 2: 3460 // ... 3461 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 3462 // ... 3463 // break; 3464 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 3465 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 3466 CGF.EmitBlock(Case2BB); 3467 3468 { 3469 CodeGenFunction::RunCleanupsScope Scope(CGF); 3470 if (!WithNowait) { 3471 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 3472 llvm::Value *EndArgs[] = { 3473 IdentTLoc, // ident_t *<loc> 3474 ThreadId, // i32 <gtid> 3475 Lock // kmp_critical_name *&<lock> 3476 }; 3477 CGF.EHStack 3478 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 3479 NormalAndEHCleanup, 3480 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 3481 llvm::makeArrayRef(EndArgs)); 3482 } 3483 auto ILHS = LHSExprs.begin(); 3484 auto IRHS = RHSExprs.begin(); 3485 auto IPriv = Privates.begin(); 3486 for (auto *E : ReductionOps) { 3487 const Expr *XExpr = nullptr; 3488 const Expr *EExpr = nullptr; 3489 const Expr *UpExpr = nullptr; 3490 BinaryOperatorKind BO = BO_Comma; 3491 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 3492 if (BO->getOpcode() == BO_Assign) { 3493 XExpr = BO->getLHS(); 3494 UpExpr = BO->getRHS(); 3495 } 3496 } 3497 // Try to emit update expression as a simple atomic. 3498 auto *RHSExpr = UpExpr; 3499 if (RHSExpr) { 3500 // Analyze RHS part of the whole expression. 3501 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 3502 RHSExpr->IgnoreParenImpCasts())) { 3503 // If this is a conditional operator, analyze its condition for 3504 // min/max reduction operator. 3505 RHSExpr = ACO->getCond(); 3506 } 3507 if (auto *BORHS = 3508 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 3509 EExpr = BORHS->getRHS(); 3510 BO = BORHS->getOpcode(); 3511 } 3512 } 3513 if (XExpr) { 3514 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3515 auto &&AtomicRedGen = [this, BO, VD, IPriv, 3516 Loc](CodeGenFunction &CGF, const Expr *XExpr, 3517 const Expr *EExpr, const Expr *UpExpr) { 3518 LValue X = CGF.EmitLValue(XExpr); 3519 RValue E; 3520 if (EExpr) 3521 E = CGF.EmitAnyExpr(EExpr); 3522 CGF.EmitOMPAtomicSimpleUpdateExpr( 3523 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, 3524 [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { 3525 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3526 PrivateScope.addPrivate( 3527 VD, [&CGF, VD, XRValue, Loc]() -> Address { 3528 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 3529 CGF.emitOMPSimpleStore( 3530 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 3531 VD->getType().getNonReferenceType(), Loc); 3532 return LHSTemp; 3533 }); 3534 (void)PrivateScope.Privatize(); 3535 return CGF.EmitAnyExpr(UpExpr); 3536 }); 3537 }; 3538 if ((*IPriv)->getType()->isArrayType()) { 3539 // Emit atomic reduction for array section. 3540 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3541 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 3542 AtomicRedGen, XExpr, EExpr, UpExpr); 3543 } else 3544 // Emit atomic reduction for array subscript or single variable. 3545 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 3546 } else { 3547 // Emit as a critical region. 3548 auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *, 3549 const Expr *, const Expr *) { 3550 emitCriticalRegion( 3551 CGF, ".atomic_reduction", 3552 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc); 3553 }; 3554 if ((*IPriv)->getType()->isArrayType()) { 3555 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3556 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3557 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3558 CritRedGen); 3559 } else 3560 CritRedGen(CGF, nullptr, nullptr, nullptr); 3561 } 3562 ++ILHS, ++IRHS, ++IPriv; 3563 } 3564 } 3565 3566 CGF.EmitBranch(DefaultBB); 3567 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 3568 } 3569 3570 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 3571 SourceLocation Loc) { 3572 if (!CGF.HaveInsertPoint()) 3573 return; 3574 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 3575 // global_tid); 3576 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3577 // Ignore return result until untied tasks are supported. 3578 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 3579 } 3580 3581 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 3582 OpenMPDirectiveKind InnerKind, 3583 const RegionCodeGenTy &CodeGen, 3584 bool HasCancel) { 3585 if (!CGF.HaveInsertPoint()) 3586 return; 3587 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 3588 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 3589 } 3590 3591 namespace { 3592 enum RTCancelKind { 3593 CancelNoreq = 0, 3594 CancelParallel = 1, 3595 CancelLoop = 2, 3596 CancelSections = 3, 3597 CancelTaskgroup = 4 3598 }; 3599 } 3600 3601 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 3602 RTCancelKind CancelKind = CancelNoreq; 3603 if (CancelRegion == OMPD_parallel) 3604 CancelKind = CancelParallel; 3605 else if (CancelRegion == OMPD_for) 3606 CancelKind = CancelLoop; 3607 else if (CancelRegion == OMPD_sections) 3608 CancelKind = CancelSections; 3609 else { 3610 assert(CancelRegion == OMPD_taskgroup); 3611 CancelKind = CancelTaskgroup; 3612 } 3613 return CancelKind; 3614 } 3615 3616 void CGOpenMPRuntime::emitCancellationPointCall( 3617 CodeGenFunction &CGF, SourceLocation Loc, 3618 OpenMPDirectiveKind CancelRegion) { 3619 if (!CGF.HaveInsertPoint()) 3620 return; 3621 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 3622 // global_tid, kmp_int32 cncl_kind); 3623 if (auto *OMPRegionInfo = 3624 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3625 if (OMPRegionInfo->hasCancel()) { 3626 llvm::Value *Args[] = { 3627 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3628 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 3629 // Ignore return result until untied tasks are supported. 3630 auto *Result = CGF.EmitRuntimeCall( 3631 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 3632 // if (__kmpc_cancellationpoint()) { 3633 // __kmpc_cancel_barrier(); 3634 // exit from construct; 3635 // } 3636 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3637 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 3638 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 3639 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3640 CGF.EmitBlock(ExitBB); 3641 // __kmpc_cancel_barrier(); 3642 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 3643 // exit from construct; 3644 auto CancelDest = 3645 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3646 CGF.EmitBranchThroughCleanup(CancelDest); 3647 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3648 } 3649 } 3650 } 3651 3652 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 3653 const Expr *IfCond, 3654 OpenMPDirectiveKind CancelRegion) { 3655 if (!CGF.HaveInsertPoint()) 3656 return; 3657 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 3658 // kmp_int32 cncl_kind); 3659 if (auto *OMPRegionInfo = 3660 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3661 auto &&ThenGen = [this, Loc, CancelRegion, 3662 OMPRegionInfo](CodeGenFunction &CGF) { 3663 llvm::Value *Args[] = { 3664 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3665 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 3666 // Ignore return result until untied tasks are supported. 3667 auto *Result = 3668 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 3669 // if (__kmpc_cancel()) { 3670 // __kmpc_cancel_barrier(); 3671 // exit from construct; 3672 // } 3673 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3674 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 3675 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 3676 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3677 CGF.EmitBlock(ExitBB); 3678 // __kmpc_cancel_barrier(); 3679 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 3680 // exit from construct; 3681 auto CancelDest = 3682 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3683 CGF.EmitBranchThroughCleanup(CancelDest); 3684 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3685 }; 3686 if (IfCond) 3687 emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {}); 3688 else 3689 ThenGen(CGF); 3690 } 3691 } 3692 3693 /// \brief Obtain information that uniquely identifies a target entry. This 3694 /// consists of the file and device IDs as well as line and column numbers 3695 /// associated with the relevant entry source location. 3696 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 3697 unsigned &DeviceID, unsigned &FileID, 3698 unsigned &LineNum, unsigned &ColumnNum) { 3699 3700 auto &SM = C.getSourceManager(); 3701 3702 // The loc should be always valid and have a file ID (the user cannot use 3703 // #pragma directives in macros) 3704 3705 assert(Loc.isValid() && "Source location is expected to be always valid."); 3706 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 3707 3708 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 3709 assert(PLoc.isValid() && "Source location is expected to be always valid."); 3710 3711 llvm::sys::fs::UniqueID ID; 3712 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 3713 llvm_unreachable("Source file with target region no longer exists!"); 3714 3715 DeviceID = ID.getDevice(); 3716 FileID = ID.getFile(); 3717 LineNum = PLoc.getLine(); 3718 ColumnNum = PLoc.getColumn(); 3719 return; 3720 } 3721 3722 void CGOpenMPRuntime::emitTargetOutlinedFunction( 3723 const OMPExecutableDirective &D, StringRef ParentName, 3724 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 3725 bool IsOffloadEntry) { 3726 3727 assert(!ParentName.empty() && "Invalid target region parent name!"); 3728 3729 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 3730 3731 // Emit target region as a standalone region. 3732 auto &&CodeGen = [&CS](CodeGenFunction &CGF) { 3733 CGF.EmitStmt(CS.getCapturedStmt()); 3734 }; 3735 3736 // Create a unique name for the proxy/entry function that using the source 3737 // location information of the current target region. The name will be 3738 // something like: 3739 // 3740 // .omp_offloading.DD_FFFF.PP.lBB.cCC 3741 // 3742 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 3743 // mangled name of the function that encloses the target region, BB is the 3744 // line number of the target region, and CC is the column number of the target 3745 // region. 3746 3747 unsigned DeviceID; 3748 unsigned FileID; 3749 unsigned Line; 3750 unsigned Column; 3751 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 3752 Line, Column); 3753 SmallString<64> EntryFnName; 3754 { 3755 llvm::raw_svector_ostream OS(EntryFnName); 3756 OS << ".omp_offloading" << llvm::format(".%x", DeviceID) 3757 << llvm::format(".%x.", FileID) << ParentName << ".l" << Line << ".c" 3758 << Column; 3759 } 3760 3761 CodeGenFunction CGF(CGM, true); 3762 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 3763 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 3764 3765 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 3766 3767 // If this target outline function is not an offload entry, we don't need to 3768 // register it. 3769 if (!IsOffloadEntry) 3770 return; 3771 3772 // The target region ID is used by the runtime library to identify the current 3773 // target region, so it only has to be unique and not necessarily point to 3774 // anything. It could be the pointer to the outlined function that implements 3775 // the target region, but we aren't using that so that the compiler doesn't 3776 // need to keep that, and could therefore inline the host function if proven 3777 // worthwhile during optimization. In the other hand, if emitting code for the 3778 // device, the ID has to be the function address so that it can retrieved from 3779 // the offloading entry and launched by the runtime library. We also mark the 3780 // outlined function to have external linkage in case we are emitting code for 3781 // the device, because these functions will be entry points to the device. 3782 3783 if (CGM.getLangOpts().OpenMPIsDevice) { 3784 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 3785 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 3786 } else 3787 OutlinedFnID = new llvm::GlobalVariable( 3788 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3789 llvm::GlobalValue::PrivateLinkage, 3790 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 3791 3792 // Register the information for the entry associated with this target region. 3793 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3794 DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID); 3795 return; 3796 } 3797 3798 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 3799 const OMPExecutableDirective &D, 3800 llvm::Value *OutlinedFn, 3801 llvm::Value *OutlinedFnID, 3802 const Expr *IfCond, const Expr *Device, 3803 ArrayRef<llvm::Value *> CapturedVars) { 3804 if (!CGF.HaveInsertPoint()) 3805 return; 3806 /// \brief Values for bit flags used to specify the mapping type for 3807 /// offloading. 3808 enum OpenMPOffloadMappingFlags { 3809 /// \brief Allocate memory on the device and move data from host to device. 3810 OMP_MAP_TO = 0x01, 3811 /// \brief Allocate memory on the device and move data from device to host. 3812 OMP_MAP_FROM = 0x02, 3813 /// \brief The element passed to the device is a pointer. 3814 OMP_MAP_PTR = 0x20, 3815 /// \brief Pass the element to the device by value. 3816 OMP_MAP_BYCOPY = 0x80, 3817 }; 3818 3819 enum OpenMPOffloadingReservedDeviceIDs { 3820 /// \brief Device ID if the device was not defined, runtime should get it 3821 /// from environment variables in the spec. 3822 OMP_DEVICEID_UNDEF = -1, 3823 }; 3824 3825 assert(OutlinedFn && "Invalid outlined function!"); 3826 3827 auto &Ctx = CGF.getContext(); 3828 3829 // Fill up the arrays with the all the captured variables. 3830 SmallVector<llvm::Value *, 16> BasePointers; 3831 SmallVector<llvm::Value *, 16> Pointers; 3832 SmallVector<llvm::Value *, 16> Sizes; 3833 SmallVector<unsigned, 16> MapTypes; 3834 3835 bool hasVLACaptures = false; 3836 3837 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 3838 auto RI = CS.getCapturedRecordDecl()->field_begin(); 3839 // auto II = CS.capture_init_begin(); 3840 auto CV = CapturedVars.begin(); 3841 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 3842 CE = CS.capture_end(); 3843 CI != CE; ++CI, ++RI, ++CV) { 3844 StringRef Name; 3845 QualType Ty; 3846 llvm::Value *BasePointer; 3847 llvm::Value *Pointer; 3848 llvm::Value *Size; 3849 unsigned MapType; 3850 3851 // VLA sizes are passed to the outlined region by copy. 3852 if (CI->capturesVariableArrayType()) { 3853 BasePointer = Pointer = *CV; 3854 Size = CGF.getTypeSize(RI->getType()); 3855 // Copy to the device as an argument. No need to retrieve it. 3856 MapType = OMP_MAP_BYCOPY; 3857 hasVLACaptures = true; 3858 } else if (CI->capturesThis()) { 3859 BasePointer = Pointer = *CV; 3860 const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr()); 3861 Size = CGF.getTypeSize(PtrTy->getPointeeType()); 3862 // Default map type. 3863 MapType = OMP_MAP_TO | OMP_MAP_FROM; 3864 } else if (CI->capturesVariableByCopy()) { 3865 MapType = OMP_MAP_BYCOPY; 3866 if (!RI->getType()->isAnyPointerType()) { 3867 // If the field is not a pointer, we need to save the actual value and 3868 // load it as a void pointer. 3869 auto DstAddr = CGF.CreateMemTemp( 3870 Ctx.getUIntPtrType(), 3871 Twine(CI->getCapturedVar()->getName()) + ".casted"); 3872 LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 3873 3874 auto *SrcAddrVal = CGF.EmitScalarConversion( 3875 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 3876 Ctx.getPointerType(RI->getType()), SourceLocation()); 3877 LValue SrcLV = 3878 CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType()); 3879 3880 // Store the value using the source type pointer. 3881 CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV); 3882 3883 // Load the value using the destination type pointer. 3884 BasePointer = Pointer = 3885 CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 3886 } else { 3887 MapType |= OMP_MAP_PTR; 3888 BasePointer = Pointer = *CV; 3889 } 3890 Size = CGF.getTypeSize(RI->getType()); 3891 } else { 3892 assert(CI->capturesVariable() && "Expected captured reference."); 3893 BasePointer = Pointer = *CV; 3894 3895 const ReferenceType *PtrTy = 3896 cast<ReferenceType>(RI->getType().getTypePtr()); 3897 QualType ElementType = PtrTy->getPointeeType(); 3898 Size = CGF.getTypeSize(ElementType); 3899 // The default map type for a scalar/complex type is 'to' because by 3900 // default the value doesn't have to be retrieved. For an aggregate type, 3901 // the default is 'tofrom'. 3902 MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) 3903 : OMP_MAP_TO; 3904 if (ElementType->isAnyPointerType()) 3905 MapType |= OMP_MAP_PTR; 3906 } 3907 3908 BasePointers.push_back(BasePointer); 3909 Pointers.push_back(Pointer); 3910 Sizes.push_back(Size); 3911 MapTypes.push_back(MapType); 3912 } 3913 3914 // Keep track on whether the host function has to be executed. 3915 auto OffloadErrorQType = 3916 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 3917 auto OffloadError = CGF.MakeAddrLValue( 3918 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 3919 OffloadErrorQType); 3920 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 3921 OffloadError); 3922 3923 // Fill up the pointer arrays and transfer execution to the device. 3924 auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, 3925 hasVLACaptures, Device, OutlinedFnID, OffloadError, 3926 OffloadErrorQType](CodeGenFunction &CGF) { 3927 unsigned PointerNumVal = BasePointers.size(); 3928 llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); 3929 llvm::Value *BasePointersArray; 3930 llvm::Value *PointersArray; 3931 llvm::Value *SizesArray; 3932 llvm::Value *MapTypesArray; 3933 3934 if (PointerNumVal) { 3935 llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); 3936 QualType PointerArrayType = Ctx.getConstantArrayType( 3937 Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 3938 /*IndexTypeQuals=*/0); 3939 3940 BasePointersArray = 3941 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 3942 PointersArray = 3943 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 3944 3945 // If we don't have any VLA types, we can use a constant array for the map 3946 // sizes, otherwise we need to fill up the arrays as we do for the 3947 // pointers. 3948 if (hasVLACaptures) { 3949 QualType SizeArrayType = Ctx.getConstantArrayType( 3950 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 3951 /*IndexTypeQuals=*/0); 3952 SizesArray = 3953 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 3954 } else { 3955 // We expect all the sizes to be constant, so we collect them to create 3956 // a constant array. 3957 SmallVector<llvm::Constant *, 16> ConstSizes; 3958 for (auto S : Sizes) 3959 ConstSizes.push_back(cast<llvm::Constant>(S)); 3960 3961 auto *SizesArrayInit = llvm::ConstantArray::get( 3962 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 3963 auto *SizesArrayGbl = new llvm::GlobalVariable( 3964 CGM.getModule(), SizesArrayInit->getType(), 3965 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 3966 SizesArrayInit, ".offload_sizes"); 3967 SizesArrayGbl->setUnnamedAddr(true); 3968 SizesArray = SizesArrayGbl; 3969 } 3970 3971 // The map types are always constant so we don't need to generate code to 3972 // fill arrays. Instead, we create an array constant. 3973 llvm::Constant *MapTypesArrayInit = 3974 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 3975 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 3976 CGM.getModule(), MapTypesArrayInit->getType(), 3977 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 3978 MapTypesArrayInit, ".offload_maptypes"); 3979 MapTypesArrayGbl->setUnnamedAddr(true); 3980 MapTypesArray = MapTypesArrayGbl; 3981 3982 for (unsigned i = 0; i < PointerNumVal; ++i) { 3983 3984 llvm::Value *BPVal = BasePointers[i]; 3985 if (BPVal->getType()->isPointerTy()) 3986 BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); 3987 else { 3988 assert(BPVal->getType()->isIntegerTy() && 3989 "If not a pointer, the value type must be an integer."); 3990 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); 3991 } 3992 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 3993 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), 3994 BasePointersArray, 0, i); 3995 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 3996 CGF.Builder.CreateStore(BPVal, BPAddr); 3997 3998 llvm::Value *PVal = Pointers[i]; 3999 if (PVal->getType()->isPointerTy()) 4000 PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); 4001 else { 4002 assert(PVal->getType()->isIntegerTy() && 4003 "If not a pointer, the value type must be an integer."); 4004 PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); 4005 } 4006 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 4007 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 4008 0, i); 4009 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 4010 CGF.Builder.CreateStore(PVal, PAddr); 4011 4012 if (hasVLACaptures) { 4013 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 4014 llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 4015 /*Idx0=*/0, 4016 /*Idx1=*/i); 4017 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 4018 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast( 4019 Sizes[i], CGM.SizeTy, /*isSigned=*/true), 4020 SAddr); 4021 } 4022 } 4023 4024 BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4025 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, 4026 /*Idx0=*/0, /*Idx1=*/0); 4027 PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4028 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 4029 /*Idx0=*/0, 4030 /*Idx1=*/0); 4031 SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4032 llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 4033 /*Idx0=*/0, /*Idx1=*/0); 4034 MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4035 llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray, 4036 /*Idx0=*/0, 4037 /*Idx1=*/0); 4038 4039 } else { 4040 BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 4041 PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 4042 SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 4043 MapTypesArray = 4044 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 4045 } 4046 4047 // On top of the arrays that were filled up, the target offloading call 4048 // takes as arguments the device id as well as the host pointer. The host 4049 // pointer is used by the runtime library to identify the current target 4050 // region, so it only has to be unique and not necessarily point to 4051 // anything. It could be the pointer to the outlined function that 4052 // implements the target region, but we aren't using that so that the 4053 // compiler doesn't need to keep that, and could therefore inline the host 4054 // function if proven worthwhile during optimization. 4055 4056 // From this point on, we need to have an ID of the target region defined. 4057 assert(OutlinedFnID && "Invalid outlined function ID!"); 4058 4059 // Emit device ID if any. 4060 llvm::Value *DeviceID; 4061 if (Device) 4062 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4063 CGM.Int32Ty, /*isSigned=*/true); 4064 else 4065 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 4066 4067 llvm::Value *OffloadingArgs[] = { 4068 DeviceID, OutlinedFnID, PointerNum, BasePointersArray, 4069 PointersArray, SizesArray, MapTypesArray}; 4070 auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), 4071 OffloadingArgs); 4072 4073 CGF.EmitStoreOfScalar(Return, OffloadError); 4074 }; 4075 4076 // Notify that the host version must be executed. 4077 auto &&ElseGen = [this, OffloadError, 4078 OffloadErrorQType](CodeGenFunction &CGF) { 4079 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), 4080 OffloadError); 4081 }; 4082 4083 // If we have a target function ID it means that we need to support 4084 // offloading, otherwise, just execute on the host. We need to execute on host 4085 // regardless of the conditional in the if clause if, e.g., the user do not 4086 // specify target triples. 4087 if (OutlinedFnID) { 4088 if (IfCond) { 4089 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 4090 } else { 4091 CodeGenFunction::RunCleanupsScope Scope(CGF); 4092 ThenGen(CGF); 4093 } 4094 } else { 4095 CodeGenFunction::RunCleanupsScope Scope(CGF); 4096 ElseGen(CGF); 4097 } 4098 4099 // Check the error code and execute the host version if required. 4100 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 4101 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 4102 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 4103 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 4104 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 4105 4106 CGF.EmitBlock(OffloadFailedBlock); 4107 CGF.Builder.CreateCall(OutlinedFn, BasePointers); 4108 CGF.EmitBranch(OffloadContBlock); 4109 4110 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 4111 return; 4112 } 4113 4114 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 4115 StringRef ParentName) { 4116 if (!S) 4117 return; 4118 4119 // If we find a OMP target directive, codegen the outline function and 4120 // register the result. 4121 // FIXME: Add other directives with target when they become supported. 4122 bool isTargetDirective = isa<OMPTargetDirective>(S); 4123 4124 if (isTargetDirective) { 4125 auto *E = cast<OMPExecutableDirective>(S); 4126 unsigned DeviceID; 4127 unsigned FileID; 4128 unsigned Line; 4129 unsigned Column; 4130 getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, 4131 FileID, Line, Column); 4132 4133 // Is this a target region that should not be emitted as an entry point? If 4134 // so just signal we are done with this target region. 4135 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo( 4136 DeviceID, FileID, ParentName, Line, Column)) 4137 return; 4138 4139 llvm::Function *Fn; 4140 llvm::Constant *Addr; 4141 emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, 4142 /*isOffloadEntry=*/true); 4143 assert(Fn && Addr && "Target region emission failed."); 4144 return; 4145 } 4146 4147 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 4148 if (!E->getAssociatedStmt()) 4149 return; 4150 4151 scanForTargetRegionsFunctions( 4152 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 4153 ParentName); 4154 return; 4155 } 4156 4157 // If this is a lambda function, look into its body. 4158 if (auto *L = dyn_cast<LambdaExpr>(S)) 4159 S = L->getBody(); 4160 4161 // Keep looking for target regions recursively. 4162 for (auto *II : S->children()) 4163 scanForTargetRegionsFunctions(II, ParentName); 4164 4165 return; 4166 } 4167 4168 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 4169 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 4170 4171 // If emitting code for the host, we do not process FD here. Instead we do 4172 // the normal code generation. 4173 if (!CGM.getLangOpts().OpenMPIsDevice) 4174 return false; 4175 4176 // Try to detect target regions in the function. 4177 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 4178 4179 // We should not emit any function othen that the ones created during the 4180 // scanning. Therefore, we signal that this function is completely dealt 4181 // with. 4182 return true; 4183 } 4184 4185 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 4186 if (!CGM.getLangOpts().OpenMPIsDevice) 4187 return false; 4188 4189 // Check if there are Ctors/Dtors in this declaration and look for target 4190 // regions in it. We use the complete variant to produce the kernel name 4191 // mangling. 4192 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 4193 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 4194 for (auto *Ctor : RD->ctors()) { 4195 StringRef ParentName = 4196 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 4197 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 4198 } 4199 auto *Dtor = RD->getDestructor(); 4200 if (Dtor) { 4201 StringRef ParentName = 4202 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 4203 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 4204 } 4205 } 4206 4207 // If we are in target mode we do not emit any global (declare target is not 4208 // implemented yet). Therefore we signal that GD was processed in this case. 4209 return true; 4210 } 4211 4212 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 4213 auto *VD = GD.getDecl(); 4214 if (isa<FunctionDecl>(VD)) 4215 return emitTargetFunctions(GD); 4216 4217 return emitTargetGlobalVariable(GD); 4218 } 4219 4220 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 4221 // If we have offloading in the current module, we need to emit the entries 4222 // now and register the offloading descriptor. 4223 createOffloadEntriesAndInfoMetadata(); 4224 4225 // Create and register the offloading binary descriptors. This is the main 4226 // entity that captures all the information about offloading in the current 4227 // compilation unit. 4228 return createOffloadingBinaryDescriptorRegistration(); 4229 } 4230