1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGOpenMPRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/StmtOpenMP.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/Bitcode/ReaderWriter.h" 22 #include "llvm/IR/CallSite.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/GlobalValue.h" 25 #include "llvm/IR/Value.h" 26 #include "llvm/Support/Format.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 30 using namespace clang; 31 using namespace CodeGen; 32 33 namespace { 34 /// \brief Base class for handling code generation inside OpenMP regions. 35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 36 public: 37 /// \brief Kinds of OpenMP regions used in codegen. 38 enum CGOpenMPRegionKind { 39 /// \brief Region with outlined function for standalone 'parallel' 40 /// directive. 41 ParallelOutlinedRegion, 42 /// \brief Region with outlined function for standalone 'task' directive. 43 TaskOutlinedRegion, 44 /// \brief Region for constructs that do not require function outlining, 45 /// like 'for', 'sections', 'atomic' etc. directives. 46 InlinedRegion, 47 /// \brief Region with outlined function for standalone 'target' directive. 48 TargetRegion, 49 }; 50 51 CGOpenMPRegionInfo(const CapturedStmt &CS, 52 const CGOpenMPRegionKind RegionKind, 53 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 54 bool HasCancel) 55 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 56 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 57 58 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 59 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 60 bool HasCancel) 61 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 62 Kind(Kind), HasCancel(HasCancel) {} 63 64 /// \brief Get a variable or parameter for storing global thread id 65 /// inside OpenMP construct. 66 virtual const VarDecl *getThreadIDVariable() const = 0; 67 68 /// \brief Emit the captured statement body. 69 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 70 71 /// \brief Get an LValue for the current ThreadID variable. 72 /// \return LValue for thread id variable. This LValue always has type int32*. 73 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 74 75 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 76 77 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 78 79 bool hasCancel() const { return HasCancel; } 80 81 static bool classof(const CGCapturedStmtInfo *Info) { 82 return Info->getKind() == CR_OpenMP; 83 } 84 85 protected: 86 CGOpenMPRegionKind RegionKind; 87 const RegionCodeGenTy &CodeGen; 88 OpenMPDirectiveKind Kind; 89 bool HasCancel; 90 }; 91 92 /// \brief API for captured statement code generation in OpenMP constructs. 93 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 94 public: 95 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 96 const RegionCodeGenTy &CodeGen, 97 OpenMPDirectiveKind Kind, bool HasCancel) 98 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 99 HasCancel), 100 ThreadIDVar(ThreadIDVar) { 101 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 102 } 103 /// \brief Get a variable or parameter for storing global thread id 104 /// inside OpenMP construct. 105 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 106 107 /// \brief Get the name of the capture helper. 108 StringRef getHelperName() const override { return ".omp_outlined."; } 109 110 static bool classof(const CGCapturedStmtInfo *Info) { 111 return CGOpenMPRegionInfo::classof(Info) && 112 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 113 ParallelOutlinedRegion; 114 } 115 116 private: 117 /// \brief A variable or parameter storing global thread id for OpenMP 118 /// constructs. 119 const VarDecl *ThreadIDVar; 120 }; 121 122 /// \brief API for captured statement code generation in OpenMP constructs. 123 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 124 public: 125 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 126 const VarDecl *ThreadIDVar, 127 const RegionCodeGenTy &CodeGen, 128 OpenMPDirectiveKind Kind, bool HasCancel) 129 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 130 ThreadIDVar(ThreadIDVar) { 131 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 132 } 133 /// \brief Get a variable or parameter for storing global thread id 134 /// inside OpenMP construct. 135 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 136 137 /// \brief Get an LValue for the current ThreadID variable. 138 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 139 140 /// \brief Get the name of the capture helper. 141 StringRef getHelperName() const override { return ".omp_outlined."; } 142 143 static bool classof(const CGCapturedStmtInfo *Info) { 144 return CGOpenMPRegionInfo::classof(Info) && 145 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 146 TaskOutlinedRegion; 147 } 148 149 private: 150 /// \brief A variable or parameter storing global thread id for OpenMP 151 /// constructs. 152 const VarDecl *ThreadIDVar; 153 }; 154 155 /// \brief API for inlined captured statement code generation in OpenMP 156 /// constructs. 157 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 158 public: 159 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 160 const RegionCodeGenTy &CodeGen, 161 OpenMPDirectiveKind Kind, bool HasCancel) 162 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 163 OldCSI(OldCSI), 164 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 165 // \brief Retrieve the value of the context parameter. 166 llvm::Value *getContextValue() const override { 167 if (OuterRegionInfo) 168 return OuterRegionInfo->getContextValue(); 169 llvm_unreachable("No context value for inlined OpenMP region"); 170 } 171 void setContextValue(llvm::Value *V) override { 172 if (OuterRegionInfo) { 173 OuterRegionInfo->setContextValue(V); 174 return; 175 } 176 llvm_unreachable("No context value for inlined OpenMP region"); 177 } 178 /// \brief Lookup the captured field decl for a variable. 179 const FieldDecl *lookup(const VarDecl *VD) const override { 180 if (OuterRegionInfo) 181 return OuterRegionInfo->lookup(VD); 182 // If there is no outer outlined region,no need to lookup in a list of 183 // captured variables, we can use the original one. 184 return nullptr; 185 } 186 FieldDecl *getThisFieldDecl() const override { 187 if (OuterRegionInfo) 188 return OuterRegionInfo->getThisFieldDecl(); 189 return nullptr; 190 } 191 /// \brief Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { 194 if (OuterRegionInfo) 195 return OuterRegionInfo->getThreadIDVariable(); 196 return nullptr; 197 } 198 199 /// \brief Get the name of the capture helper. 200 StringRef getHelperName() const override { 201 if (auto *OuterRegionInfo = getOldCSI()) 202 return OuterRegionInfo->getHelperName(); 203 llvm_unreachable("No helper name for inlined OpenMP construct"); 204 } 205 206 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 207 208 static bool classof(const CGCapturedStmtInfo *Info) { 209 return CGOpenMPRegionInfo::classof(Info) && 210 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 211 } 212 213 private: 214 /// \brief CodeGen info about outer OpenMP region. 215 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 216 CGOpenMPRegionInfo *OuterRegionInfo; 217 }; 218 219 /// \brief API for captured statement code generation in OpenMP target 220 /// constructs. For this captures, implicit parameters are used instead of the 221 /// captured fields. The name of the target region has to be unique in a given 222 /// application so it is provided by the client, because only the client has 223 /// the information to generate that. 224 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { 225 public: 226 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 227 const RegionCodeGenTy &CodeGen, StringRef HelperName) 228 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 229 /*HasCancel=*/false), 230 HelperName(HelperName) {} 231 232 /// \brief This is unused for target regions because each starts executing 233 /// with a single thread. 234 const VarDecl *getThreadIDVariable() const override { return nullptr; } 235 236 /// \brief Get the name of the capture helper. 237 StringRef getHelperName() const override { return HelperName; } 238 239 static bool classof(const CGCapturedStmtInfo *Info) { 240 return CGOpenMPRegionInfo::classof(Info) && 241 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 242 } 243 244 private: 245 StringRef HelperName; 246 }; 247 248 /// \brief RAII for emitting code of OpenMP constructs. 249 class InlinedOpenMPRegionRAII { 250 CodeGenFunction &CGF; 251 252 public: 253 /// \brief Constructs region for combined constructs. 254 /// \param CodeGen Code generation sequence for combined directives. Includes 255 /// a list of functions used for code generation of implicitly inlined 256 /// regions. 257 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 258 OpenMPDirectiveKind Kind, bool HasCancel) 259 : CGF(CGF) { 260 // Start emission for the construct. 261 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 262 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 263 } 264 ~InlinedOpenMPRegionRAII() { 265 // Restore original CapturedStmtInfo only if we're done with code emission. 266 auto *OldCSI = 267 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 268 delete CGF.CapturedStmtInfo; 269 CGF.CapturedStmtInfo = OldCSI; 270 } 271 }; 272 273 } // anonymous namespace 274 275 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr, 276 QualType Ty) { 277 AlignmentSource Source; 278 CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source); 279 return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align), 280 Ty->getPointeeType(), Source); 281 } 282 283 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 284 return emitLoadOfPointerLValue(CGF, 285 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 286 getThreadIDVariable()->getType()); 287 } 288 289 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 290 if (!CGF.HaveInsertPoint()) 291 return; 292 // 1.2.2 OpenMP Language Terminology 293 // Structured block - An executable statement with a single entry at the 294 // top and a single exit at the bottom. 295 // The point of exit cannot be a branch out of the structured block. 296 // longjmp() and throw() must not violate the entry/exit criteria. 297 CGF.EHStack.pushTerminate(); 298 { 299 CodeGenFunction::RunCleanupsScope Scope(CGF); 300 CodeGen(CGF); 301 } 302 CGF.EHStack.popTerminate(); 303 } 304 305 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 306 CodeGenFunction &CGF) { 307 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 308 getThreadIDVariable()->getType(), 309 AlignmentSource::Decl); 310 } 311 312 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 313 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr), 314 OffloadEntriesInfoManager(CGM) { 315 IdentTy = llvm::StructType::create( 316 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 317 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 318 CGM.Int8PtrTy /* psource */, nullptr); 319 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 320 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 321 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 322 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 323 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 324 325 loadOffloadInfoMetadata(); 326 } 327 328 void CGOpenMPRuntime::clear() { 329 InternalVars.clear(); 330 } 331 332 // Layout information for ident_t. 333 static CharUnits getIdentAlign(CodeGenModule &CGM) { 334 return CGM.getPointerAlign(); 335 } 336 static CharUnits getIdentSize(CodeGenModule &CGM) { 337 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 338 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 339 } 340 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) { 341 // All the fields except the last are i32, so this works beautifully. 342 return unsigned(Field) * CharUnits::fromQuantity(4); 343 } 344 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 345 CGOpenMPRuntime::IdentFieldIndex Field, 346 const llvm::Twine &Name = "") { 347 auto Offset = getOffsetOfIdentField(Field); 348 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 349 } 350 351 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 352 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 353 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 354 assert(ThreadIDVar->getType()->isPointerType() && 355 "thread id variable must be of type kmp_int32 *"); 356 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 357 CodeGenFunction CGF(CGM, true); 358 bool HasCancel = false; 359 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 360 HasCancel = OPD->hasCancel(); 361 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 362 HasCancel = OPSD->hasCancel(); 363 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 364 HasCancel = OPFD->hasCancel(); 365 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 366 HasCancel); 367 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 368 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 369 } 370 371 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 372 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 373 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 374 assert(!ThreadIDVar->getType()->isPointerType() && 375 "thread id variable must be of type kmp_int32 for tasks"); 376 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 377 CodeGenFunction CGF(CGM, true); 378 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 379 InnermostKind, 380 cast<OMPTaskDirective>(D).hasCancel()); 381 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 382 return CGF.GenerateCapturedStmtFunction(*CS); 383 } 384 385 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 386 CharUnits Align = getIdentAlign(CGM); 387 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 388 if (!Entry) { 389 if (!DefaultOpenMPPSource) { 390 // Initialize default location for psource field of ident_t structure of 391 // all ident_t objects. Format is ";file;function;line;column;;". 392 // Taken from 393 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 394 DefaultOpenMPPSource = 395 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 396 DefaultOpenMPPSource = 397 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 398 } 399 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 400 CGM.getModule(), IdentTy, /*isConstant*/ true, 401 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 402 DefaultOpenMPLocation->setUnnamedAddr(true); 403 DefaultOpenMPLocation->setAlignment(Align.getQuantity()); 404 405 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 406 llvm::Constant *Values[] = {Zero, 407 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 408 Zero, Zero, DefaultOpenMPPSource}; 409 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 410 DefaultOpenMPLocation->setInitializer(Init); 411 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 412 } 413 return Address(Entry, Align); 414 } 415 416 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 417 SourceLocation Loc, 418 OpenMPLocationFlags Flags) { 419 // If no debug info is generated - return global default location. 420 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 421 Loc.isInvalid()) 422 return getOrCreateDefaultLocation(Flags).getPointer(); 423 424 assert(CGF.CurFn && "No function in current CodeGenFunction."); 425 426 Address LocValue = Address::invalid(); 427 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 428 if (I != OpenMPLocThreadIDMap.end()) 429 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 430 431 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 432 // GetOpenMPThreadID was called before this routine. 433 if (!LocValue.isValid()) { 434 // Generate "ident_t .kmpc_loc.addr;" 435 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 436 ".kmpc_loc.addr"); 437 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 438 Elem.second.DebugLoc = AI.getPointer(); 439 LocValue = AI; 440 441 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 442 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 443 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 444 CGM.getSize(getIdentSize(CGF.CGM))); 445 } 446 447 // char **psource = &.kmpc_loc_<flags>.addr.psource; 448 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 449 450 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 451 if (OMPDebugLoc == nullptr) { 452 SmallString<128> Buffer2; 453 llvm::raw_svector_ostream OS2(Buffer2); 454 // Build debug location 455 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 456 OS2 << ";" << PLoc.getFilename() << ";"; 457 if (const FunctionDecl *FD = 458 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 459 OS2 << FD->getQualifiedNameAsString(); 460 } 461 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 462 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 463 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 464 } 465 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 466 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 467 468 // Our callers always pass this to a runtime function, so for 469 // convenience, go ahead and return a naked pointer. 470 return LocValue.getPointer(); 471 } 472 473 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 474 SourceLocation Loc) { 475 assert(CGF.CurFn && "No function in current CodeGenFunction."); 476 477 llvm::Value *ThreadID = nullptr; 478 // Check whether we've already cached a load of the thread id in this 479 // function. 480 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 481 if (I != OpenMPLocThreadIDMap.end()) { 482 ThreadID = I->second.ThreadID; 483 if (ThreadID != nullptr) 484 return ThreadID; 485 } 486 if (auto OMPRegionInfo = 487 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 488 if (OMPRegionInfo->getThreadIDVariable()) { 489 // Check if this an outlined function with thread id passed as argument. 490 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 491 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 492 // If value loaded in entry block, cache it and use it everywhere in 493 // function. 494 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 495 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 496 Elem.second.ThreadID = ThreadID; 497 } 498 return ThreadID; 499 } 500 } 501 502 // This is not an outlined function region - need to call __kmpc_int32 503 // kmpc_global_thread_num(ident_t *loc). 504 // Generate thread id value and cache this value for use across the 505 // function. 506 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 507 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 508 ThreadID = 509 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 510 emitUpdateLocation(CGF, Loc)); 511 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 512 Elem.second.ThreadID = ThreadID; 513 return ThreadID; 514 } 515 516 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 517 assert(CGF.CurFn && "No function in current CodeGenFunction."); 518 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 519 OpenMPLocThreadIDMap.erase(CGF.CurFn); 520 } 521 522 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 523 return llvm::PointerType::getUnqual(IdentTy); 524 } 525 526 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 527 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 528 } 529 530 llvm::Constant * 531 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 532 llvm::Constant *RTLFn = nullptr; 533 switch (Function) { 534 case OMPRTL__kmpc_fork_call: { 535 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 536 // microtask, ...); 537 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 538 getKmpc_MicroPointerTy()}; 539 llvm::FunctionType *FnTy = 540 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 541 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 542 break; 543 } 544 case OMPRTL__kmpc_global_thread_num: { 545 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 546 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 547 llvm::FunctionType *FnTy = 548 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 549 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 550 break; 551 } 552 case OMPRTL__kmpc_threadprivate_cached: { 553 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 554 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 555 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 556 CGM.VoidPtrTy, CGM.SizeTy, 557 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 558 llvm::FunctionType *FnTy = 559 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 560 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 561 break; 562 } 563 case OMPRTL__kmpc_critical: { 564 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 565 // kmp_critical_name *crit); 566 llvm::Type *TypeParams[] = { 567 getIdentTyPointerTy(), CGM.Int32Ty, 568 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 569 llvm::FunctionType *FnTy = 570 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 571 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 572 break; 573 } 574 case OMPRTL__kmpc_critical_with_hint: { 575 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 576 // kmp_critical_name *crit, uintptr_t hint); 577 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 578 llvm::PointerType::getUnqual(KmpCriticalNameTy), 579 CGM.IntPtrTy}; 580 llvm::FunctionType *FnTy = 581 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 582 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 583 break; 584 } 585 case OMPRTL__kmpc_threadprivate_register: { 586 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 587 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 588 // typedef void *(*kmpc_ctor)(void *); 589 auto KmpcCtorTy = 590 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 591 /*isVarArg*/ false)->getPointerTo(); 592 // typedef void *(*kmpc_cctor)(void *, void *); 593 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 594 auto KmpcCopyCtorTy = 595 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 596 /*isVarArg*/ false)->getPointerTo(); 597 // typedef void (*kmpc_dtor)(void *); 598 auto KmpcDtorTy = 599 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 600 ->getPointerTo(); 601 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 602 KmpcCopyCtorTy, KmpcDtorTy}; 603 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 604 /*isVarArg*/ false); 605 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 606 break; 607 } 608 case OMPRTL__kmpc_end_critical: { 609 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 610 // kmp_critical_name *crit); 611 llvm::Type *TypeParams[] = { 612 getIdentTyPointerTy(), CGM.Int32Ty, 613 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 614 llvm::FunctionType *FnTy = 615 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 616 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 617 break; 618 } 619 case OMPRTL__kmpc_cancel_barrier: { 620 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 621 // global_tid); 622 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 623 llvm::FunctionType *FnTy = 624 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 625 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 626 break; 627 } 628 case OMPRTL__kmpc_barrier: { 629 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 630 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 631 llvm::FunctionType *FnTy = 632 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 633 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 634 break; 635 } 636 case OMPRTL__kmpc_for_static_fini: { 637 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 638 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 639 llvm::FunctionType *FnTy = 640 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 641 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 642 break; 643 } 644 case OMPRTL__kmpc_push_num_threads: { 645 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 646 // kmp_int32 num_threads) 647 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 648 CGM.Int32Ty}; 649 llvm::FunctionType *FnTy = 650 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 651 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 652 break; 653 } 654 case OMPRTL__kmpc_serialized_parallel: { 655 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 656 // global_tid); 657 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 658 llvm::FunctionType *FnTy = 659 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 660 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 661 break; 662 } 663 case OMPRTL__kmpc_end_serialized_parallel: { 664 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 665 // global_tid); 666 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 667 llvm::FunctionType *FnTy = 668 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 669 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 670 break; 671 } 672 case OMPRTL__kmpc_flush: { 673 // Build void __kmpc_flush(ident_t *loc); 674 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 675 llvm::FunctionType *FnTy = 676 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 677 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 678 break; 679 } 680 case OMPRTL__kmpc_master: { 681 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 682 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 683 llvm::FunctionType *FnTy = 684 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 685 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 686 break; 687 } 688 case OMPRTL__kmpc_end_master: { 689 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 690 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 691 llvm::FunctionType *FnTy = 692 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 693 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 694 break; 695 } 696 case OMPRTL__kmpc_omp_taskyield: { 697 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 698 // int end_part); 699 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 700 llvm::FunctionType *FnTy = 701 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 702 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 703 break; 704 } 705 case OMPRTL__kmpc_single: { 706 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 707 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 708 llvm::FunctionType *FnTy = 709 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 710 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 711 break; 712 } 713 case OMPRTL__kmpc_end_single: { 714 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 715 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 716 llvm::FunctionType *FnTy = 717 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 718 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 719 break; 720 } 721 case OMPRTL__kmpc_omp_task_alloc: { 722 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 723 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 724 // kmp_routine_entry_t *task_entry); 725 assert(KmpRoutineEntryPtrTy != nullptr && 726 "Type kmp_routine_entry_t must be created."); 727 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 728 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 729 // Return void * and then cast to particular kmp_task_t type. 730 llvm::FunctionType *FnTy = 731 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 732 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 733 break; 734 } 735 case OMPRTL__kmpc_omp_task: { 736 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 737 // *new_task); 738 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 739 CGM.VoidPtrTy}; 740 llvm::FunctionType *FnTy = 741 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 742 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 743 break; 744 } 745 case OMPRTL__kmpc_copyprivate: { 746 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 747 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 748 // kmp_int32 didit); 749 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 750 auto *CpyFnTy = 751 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 752 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 753 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 754 CGM.Int32Ty}; 755 llvm::FunctionType *FnTy = 756 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 757 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 758 break; 759 } 760 case OMPRTL__kmpc_reduce: { 761 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 762 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 763 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 764 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 765 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 766 /*isVarArg=*/false); 767 llvm::Type *TypeParams[] = { 768 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 769 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 770 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 771 llvm::FunctionType *FnTy = 772 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 773 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 774 break; 775 } 776 case OMPRTL__kmpc_reduce_nowait: { 777 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 778 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 779 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 780 // *lck); 781 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 782 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 783 /*isVarArg=*/false); 784 llvm::Type *TypeParams[] = { 785 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 786 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 787 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 788 llvm::FunctionType *FnTy = 789 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 790 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 791 break; 792 } 793 case OMPRTL__kmpc_end_reduce: { 794 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 795 // kmp_critical_name *lck); 796 llvm::Type *TypeParams[] = { 797 getIdentTyPointerTy(), CGM.Int32Ty, 798 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 799 llvm::FunctionType *FnTy = 800 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 801 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 802 break; 803 } 804 case OMPRTL__kmpc_end_reduce_nowait: { 805 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 806 // kmp_critical_name *lck); 807 llvm::Type *TypeParams[] = { 808 getIdentTyPointerTy(), CGM.Int32Ty, 809 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 810 llvm::FunctionType *FnTy = 811 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 812 RTLFn = 813 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 814 break; 815 } 816 case OMPRTL__kmpc_omp_task_begin_if0: { 817 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 818 // *new_task); 819 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 820 CGM.VoidPtrTy}; 821 llvm::FunctionType *FnTy = 822 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 823 RTLFn = 824 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 825 break; 826 } 827 case OMPRTL__kmpc_omp_task_complete_if0: { 828 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 829 // *new_task); 830 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 831 CGM.VoidPtrTy}; 832 llvm::FunctionType *FnTy = 833 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 834 RTLFn = CGM.CreateRuntimeFunction(FnTy, 835 /*Name=*/"__kmpc_omp_task_complete_if0"); 836 break; 837 } 838 case OMPRTL__kmpc_ordered: { 839 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 840 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 841 llvm::FunctionType *FnTy = 842 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 843 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 844 break; 845 } 846 case OMPRTL__kmpc_end_ordered: { 847 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 848 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 849 llvm::FunctionType *FnTy = 850 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 851 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 852 break; 853 } 854 case OMPRTL__kmpc_omp_taskwait: { 855 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 856 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 857 llvm::FunctionType *FnTy = 858 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 859 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 860 break; 861 } 862 case OMPRTL__kmpc_taskgroup: { 863 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 864 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 865 llvm::FunctionType *FnTy = 866 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 867 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 868 break; 869 } 870 case OMPRTL__kmpc_end_taskgroup: { 871 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 872 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 873 llvm::FunctionType *FnTy = 874 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 875 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 876 break; 877 } 878 case OMPRTL__kmpc_push_proc_bind: { 879 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 880 // int proc_bind) 881 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 882 llvm::FunctionType *FnTy = 883 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 884 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 885 break; 886 } 887 case OMPRTL__kmpc_omp_task_with_deps: { 888 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 889 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 890 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 891 llvm::Type *TypeParams[] = { 892 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 893 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 894 llvm::FunctionType *FnTy = 895 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 896 RTLFn = 897 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 898 break; 899 } 900 case OMPRTL__kmpc_omp_wait_deps: { 901 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 902 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 903 // kmp_depend_info_t *noalias_dep_list); 904 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 905 CGM.Int32Ty, CGM.VoidPtrTy, 906 CGM.Int32Ty, CGM.VoidPtrTy}; 907 llvm::FunctionType *FnTy = 908 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 909 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 910 break; 911 } 912 case OMPRTL__kmpc_cancellationpoint: { 913 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 914 // global_tid, kmp_int32 cncl_kind) 915 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 916 llvm::FunctionType *FnTy = 917 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 918 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 919 break; 920 } 921 case OMPRTL__kmpc_cancel: { 922 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 923 // kmp_int32 cncl_kind) 924 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 925 llvm::FunctionType *FnTy = 926 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 927 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 928 break; 929 } 930 case OMPRTL__tgt_target: { 931 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 932 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 933 // *arg_types); 934 llvm::Type *TypeParams[] = {CGM.Int32Ty, 935 CGM.VoidPtrTy, 936 CGM.Int32Ty, 937 CGM.VoidPtrPtrTy, 938 CGM.VoidPtrPtrTy, 939 CGM.SizeTy->getPointerTo(), 940 CGM.Int32Ty->getPointerTo()}; 941 llvm::FunctionType *FnTy = 942 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 943 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 944 break; 945 } 946 case OMPRTL__tgt_register_lib: { 947 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 948 QualType ParamTy = 949 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 950 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 951 llvm::FunctionType *FnTy = 952 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 953 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 954 break; 955 } 956 case OMPRTL__tgt_unregister_lib: { 957 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 958 QualType ParamTy = 959 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 960 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 961 llvm::FunctionType *FnTy = 962 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 963 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 964 break; 965 } 966 } 967 return RTLFn; 968 } 969 970 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) { 971 auto &C = CGF.getContext(); 972 llvm::Value *Size = nullptr; 973 auto SizeInChars = C.getTypeSizeInChars(Ty); 974 if (SizeInChars.isZero()) { 975 // getTypeSizeInChars() returns 0 for a VLA. 976 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 977 llvm::Value *ArraySize; 978 std::tie(ArraySize, Ty) = CGF.getVLASize(VAT); 979 Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 980 } 981 SizeInChars = C.getTypeSizeInChars(Ty); 982 assert(!SizeInChars.isZero()); 983 Size = CGF.Builder.CreateNUWMul( 984 Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity())); 985 } else 986 Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()); 987 return Size; 988 } 989 990 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 991 bool IVSigned) { 992 assert((IVSize == 32 || IVSize == 64) && 993 "IV size is not compatible with the omp runtime"); 994 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 995 : "__kmpc_for_static_init_4u") 996 : (IVSigned ? "__kmpc_for_static_init_8" 997 : "__kmpc_for_static_init_8u"); 998 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 999 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1000 llvm::Type *TypeParams[] = { 1001 getIdentTyPointerTy(), // loc 1002 CGM.Int32Ty, // tid 1003 CGM.Int32Ty, // schedtype 1004 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1005 PtrTy, // p_lower 1006 PtrTy, // p_upper 1007 PtrTy, // p_stride 1008 ITy, // incr 1009 ITy // chunk 1010 }; 1011 llvm::FunctionType *FnTy = 1012 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1013 return CGM.CreateRuntimeFunction(FnTy, Name); 1014 } 1015 1016 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 1017 bool IVSigned) { 1018 assert((IVSize == 32 || IVSize == 64) && 1019 "IV size is not compatible with the omp runtime"); 1020 auto Name = 1021 IVSize == 32 1022 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1023 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1024 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1025 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1026 CGM.Int32Ty, // tid 1027 CGM.Int32Ty, // schedtype 1028 ITy, // lower 1029 ITy, // upper 1030 ITy, // stride 1031 ITy // chunk 1032 }; 1033 llvm::FunctionType *FnTy = 1034 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1035 return CGM.CreateRuntimeFunction(FnTy, Name); 1036 } 1037 1038 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1039 bool IVSigned) { 1040 assert((IVSize == 32 || IVSize == 64) && 1041 "IV size is not compatible with the omp runtime"); 1042 auto Name = 1043 IVSize == 32 1044 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1045 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1046 llvm::Type *TypeParams[] = { 1047 getIdentTyPointerTy(), // loc 1048 CGM.Int32Ty, // tid 1049 }; 1050 llvm::FunctionType *FnTy = 1051 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1052 return CGM.CreateRuntimeFunction(FnTy, Name); 1053 } 1054 1055 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1056 bool IVSigned) { 1057 assert((IVSize == 32 || IVSize == 64) && 1058 "IV size is not compatible with the omp runtime"); 1059 auto Name = 1060 IVSize == 32 1061 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1062 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1063 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1064 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1065 llvm::Type *TypeParams[] = { 1066 getIdentTyPointerTy(), // loc 1067 CGM.Int32Ty, // tid 1068 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1069 PtrTy, // p_lower 1070 PtrTy, // p_upper 1071 PtrTy // p_stride 1072 }; 1073 llvm::FunctionType *FnTy = 1074 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1075 return CGM.CreateRuntimeFunction(FnTy, Name); 1076 } 1077 1078 llvm::Constant * 1079 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1080 assert(!CGM.getLangOpts().OpenMPUseTLS || 1081 !CGM.getContext().getTargetInfo().isTLSSupported()); 1082 // Lookup the entry, lazily creating it if necessary. 1083 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1084 Twine(CGM.getMangledName(VD)) + ".cache."); 1085 } 1086 1087 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1088 const VarDecl *VD, 1089 Address VDAddr, 1090 SourceLocation Loc) { 1091 if (CGM.getLangOpts().OpenMPUseTLS && 1092 CGM.getContext().getTargetInfo().isTLSSupported()) 1093 return VDAddr; 1094 1095 auto VarTy = VDAddr.getElementType(); 1096 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1097 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1098 CGM.Int8PtrTy), 1099 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1100 getOrCreateThreadPrivateCache(VD)}; 1101 return Address(CGF.EmitRuntimeCall( 1102 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1103 VDAddr.getAlignment()); 1104 } 1105 1106 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1107 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1108 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1109 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1110 // library. 1111 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1112 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1113 OMPLoc); 1114 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1115 // to register constructor/destructor for variable. 1116 llvm::Value *Args[] = {OMPLoc, 1117 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1118 CGM.VoidPtrTy), 1119 Ctor, CopyCtor, Dtor}; 1120 CGF.EmitRuntimeCall( 1121 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1122 } 1123 1124 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1125 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1126 bool PerformInit, CodeGenFunction *CGF) { 1127 if (CGM.getLangOpts().OpenMPUseTLS && 1128 CGM.getContext().getTargetInfo().isTLSSupported()) 1129 return nullptr; 1130 1131 VD = VD->getDefinition(CGM.getContext()); 1132 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1133 ThreadPrivateWithDefinition.insert(VD); 1134 QualType ASTTy = VD->getType(); 1135 1136 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1137 auto Init = VD->getAnyInitializer(); 1138 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1139 // Generate function that re-emits the declaration's initializer into the 1140 // threadprivate copy of the variable VD 1141 CodeGenFunction CtorCGF(CGM); 1142 FunctionArgList Args; 1143 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1144 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1145 Args.push_back(&Dst); 1146 1147 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1148 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 1149 /*isVariadic=*/false); 1150 auto FTy = CGM.getTypes().GetFunctionType(FI); 1151 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1152 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1153 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1154 Args, SourceLocation()); 1155 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1156 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1157 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1158 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1159 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1160 CtorCGF.ConvertTypeForMem(ASTTy)); 1161 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1162 /*IsInitializer=*/true); 1163 ArgVal = CtorCGF.EmitLoadOfScalar( 1164 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1165 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1166 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1167 CtorCGF.FinishFunction(); 1168 Ctor = Fn; 1169 } 1170 if (VD->getType().isDestructedType() != QualType::DK_none) { 1171 // Generate function that emits destructor call for the threadprivate copy 1172 // of the variable VD 1173 CodeGenFunction DtorCGF(CGM); 1174 FunctionArgList Args; 1175 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1176 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1177 Args.push_back(&Dst); 1178 1179 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1180 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 1181 /*isVariadic=*/false); 1182 auto FTy = CGM.getTypes().GetFunctionType(FI); 1183 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1184 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1185 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1186 SourceLocation()); 1187 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1188 DtorCGF.GetAddrOfLocalVar(&Dst), 1189 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1190 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1191 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1192 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1193 DtorCGF.FinishFunction(); 1194 Dtor = Fn; 1195 } 1196 // Do not emit init function if it is not required. 1197 if (!Ctor && !Dtor) 1198 return nullptr; 1199 1200 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1201 auto CopyCtorTy = 1202 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1203 /*isVarArg=*/false)->getPointerTo(); 1204 // Copying constructor for the threadprivate variable. 1205 // Must be NULL - reserved by runtime, but currently it requires that this 1206 // parameter is always NULL. Otherwise it fires assertion. 1207 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1208 if (Ctor == nullptr) { 1209 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1210 /*isVarArg=*/false)->getPointerTo(); 1211 Ctor = llvm::Constant::getNullValue(CtorTy); 1212 } 1213 if (Dtor == nullptr) { 1214 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1215 /*isVarArg=*/false)->getPointerTo(); 1216 Dtor = llvm::Constant::getNullValue(DtorTy); 1217 } 1218 if (!CGF) { 1219 auto InitFunctionTy = 1220 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1221 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1222 InitFunctionTy, ".__omp_threadprivate_init_.", 1223 CGM.getTypes().arrangeNullaryFunction()); 1224 CodeGenFunction InitCGF(CGM); 1225 FunctionArgList ArgList; 1226 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1227 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1228 Loc); 1229 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1230 InitCGF.FinishFunction(); 1231 return InitFunction; 1232 } 1233 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1234 } 1235 return nullptr; 1236 } 1237 1238 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1239 /// function. Here is the logic: 1240 /// if (Cond) { 1241 /// ThenGen(); 1242 /// } else { 1243 /// ElseGen(); 1244 /// } 1245 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1246 const RegionCodeGenTy &ThenGen, 1247 const RegionCodeGenTy &ElseGen) { 1248 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1249 1250 // If the condition constant folds and can be elided, try to avoid emitting 1251 // the condition and the dead arm of the if/else. 1252 bool CondConstant; 1253 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1254 CodeGenFunction::RunCleanupsScope Scope(CGF); 1255 if (CondConstant) { 1256 ThenGen(CGF); 1257 } else { 1258 ElseGen(CGF); 1259 } 1260 return; 1261 } 1262 1263 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1264 // emit the conditional branch. 1265 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1266 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1267 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1268 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1269 1270 // Emit the 'then' code. 1271 CGF.EmitBlock(ThenBlock); 1272 { 1273 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1274 ThenGen(CGF); 1275 } 1276 CGF.EmitBranch(ContBlock); 1277 // Emit the 'else' code if present. 1278 { 1279 // There is no need to emit line number for unconditional branch. 1280 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1281 CGF.EmitBlock(ElseBlock); 1282 } 1283 { 1284 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1285 ElseGen(CGF); 1286 } 1287 { 1288 // There is no need to emit line number for unconditional branch. 1289 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1290 CGF.EmitBranch(ContBlock); 1291 } 1292 // Emit the continuation block for code after the if. 1293 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1294 } 1295 1296 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1297 llvm::Value *OutlinedFn, 1298 ArrayRef<llvm::Value *> CapturedVars, 1299 const Expr *IfCond) { 1300 if (!CGF.HaveInsertPoint()) 1301 return; 1302 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1303 auto &&ThenGen = [this, OutlinedFn, CapturedVars, 1304 RTLoc](CodeGenFunction &CGF) { 1305 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1306 llvm::Value *Args[] = { 1307 RTLoc, 1308 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1309 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 1310 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1311 RealArgs.append(std::begin(Args), std::end(Args)); 1312 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1313 1314 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 1315 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1316 }; 1317 auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, 1318 Loc](CodeGenFunction &CGF) { 1319 auto ThreadID = getThreadID(CGF, Loc); 1320 // Build calls: 1321 // __kmpc_serialized_parallel(&Loc, GTid); 1322 llvm::Value *Args[] = {RTLoc, ThreadID}; 1323 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 1324 Args); 1325 1326 // OutlinedFn(>id, &zero, CapturedStruct); 1327 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 1328 Address ZeroAddr = 1329 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1330 /*Name*/ ".zero.addr"); 1331 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1332 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1333 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1334 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1335 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1336 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1337 1338 // __kmpc_end_serialized_parallel(&Loc, GTid); 1339 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 1340 CGF.EmitRuntimeCall( 1341 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 1342 }; 1343 if (IfCond) { 1344 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1345 } else { 1346 CodeGenFunction::RunCleanupsScope Scope(CGF); 1347 ThenGen(CGF); 1348 } 1349 } 1350 1351 // If we're inside an (outlined) parallel region, use the region info's 1352 // thread-ID variable (it is passed in a first argument of the outlined function 1353 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1354 // regular serial code region, get thread ID by calling kmp_int32 1355 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1356 // return the address of that temp. 1357 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1358 SourceLocation Loc) { 1359 if (auto OMPRegionInfo = 1360 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1361 if (OMPRegionInfo->getThreadIDVariable()) 1362 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1363 1364 auto ThreadID = getThreadID(CGF, Loc); 1365 auto Int32Ty = 1366 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1367 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1368 CGF.EmitStoreOfScalar(ThreadID, 1369 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1370 1371 return ThreadIDTemp; 1372 } 1373 1374 llvm::Constant * 1375 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1376 const llvm::Twine &Name) { 1377 SmallString<256> Buffer; 1378 llvm::raw_svector_ostream Out(Buffer); 1379 Out << Name; 1380 auto RuntimeName = Out.str(); 1381 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1382 if (Elem.second) { 1383 assert(Elem.second->getType()->getPointerElementType() == Ty && 1384 "OMP internal variable has different type than requested"); 1385 return &*Elem.second; 1386 } 1387 1388 return Elem.second = new llvm::GlobalVariable( 1389 CGM.getModule(), Ty, /*IsConstant*/ false, 1390 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1391 Elem.first()); 1392 } 1393 1394 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1395 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1396 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1397 } 1398 1399 namespace { 1400 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup { 1401 llvm::Value *Callee; 1402 llvm::Value *Args[N]; 1403 1404 public: 1405 CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) 1406 : Callee(Callee) { 1407 assert(CleanupArgs.size() == N); 1408 std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); 1409 } 1410 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 1411 if (!CGF.HaveInsertPoint()) 1412 return; 1413 CGF.EmitRuntimeCall(Callee, Args); 1414 } 1415 }; 1416 } // anonymous namespace 1417 1418 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1419 StringRef CriticalName, 1420 const RegionCodeGenTy &CriticalOpGen, 1421 SourceLocation Loc, const Expr *Hint) { 1422 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 1423 // CriticalOpGen(); 1424 // __kmpc_end_critical(ident_t *, gtid, Lock); 1425 // Prepare arguments and build a call to __kmpc_critical 1426 if (!CGF.HaveInsertPoint()) 1427 return; 1428 CodeGenFunction::RunCleanupsScope Scope(CGF); 1429 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1430 getCriticalRegionLock(CriticalName)}; 1431 if (Hint) { 1432 llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args), 1433 std::end(Args)); 1434 auto *HintVal = CGF.EmitScalarExpr(Hint); 1435 ArgsWithHint.push_back( 1436 CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false)); 1437 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint), 1438 ArgsWithHint); 1439 } else 1440 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 1441 // Build a call to __kmpc_end_critical 1442 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1443 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), 1444 llvm::makeArrayRef(Args)); 1445 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 1446 } 1447 1448 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 1449 OpenMPDirectiveKind Kind, SourceLocation Loc, 1450 const RegionCodeGenTy &BodyOpGen) { 1451 llvm::Value *CallBool = CGF.EmitScalarConversion( 1452 IfCond, 1453 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 1454 CGF.getContext().BoolTy, Loc); 1455 1456 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1457 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 1458 // Generate the branch (If-stmt) 1459 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1460 CGF.EmitBlock(ThenBlock); 1461 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); 1462 // Emit the rest of bblocks/branches 1463 CGF.EmitBranch(ContBlock); 1464 CGF.EmitBlock(ContBlock, true); 1465 } 1466 1467 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 1468 const RegionCodeGenTy &MasterOpGen, 1469 SourceLocation Loc) { 1470 if (!CGF.HaveInsertPoint()) 1471 return; 1472 // if(__kmpc_master(ident_t *, gtid)) { 1473 // MasterOpGen(); 1474 // __kmpc_end_master(ident_t *, gtid); 1475 // } 1476 // Prepare arguments and build a call to __kmpc_master 1477 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1478 auto *IsMaster = 1479 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 1480 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1481 MasterCallEndCleanup; 1482 emitIfStmt( 1483 CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { 1484 CodeGenFunction::RunCleanupsScope Scope(CGF); 1485 CGF.EHStack.pushCleanup<MasterCallEndCleanup>( 1486 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), 1487 llvm::makeArrayRef(Args)); 1488 MasterOpGen(CGF); 1489 }); 1490 } 1491 1492 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 1493 SourceLocation Loc) { 1494 if (!CGF.HaveInsertPoint()) 1495 return; 1496 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 1497 llvm::Value *Args[] = { 1498 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1499 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 1500 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 1501 } 1502 1503 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 1504 const RegionCodeGenTy &TaskgroupOpGen, 1505 SourceLocation Loc) { 1506 if (!CGF.HaveInsertPoint()) 1507 return; 1508 // __kmpc_taskgroup(ident_t *, gtid); 1509 // TaskgroupOpGen(); 1510 // __kmpc_end_taskgroup(ident_t *, gtid); 1511 // Prepare arguments and build a call to __kmpc_taskgroup 1512 { 1513 CodeGenFunction::RunCleanupsScope Scope(CGF); 1514 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1515 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); 1516 // Build a call to __kmpc_end_taskgroup 1517 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1518 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 1519 llvm::makeArrayRef(Args)); 1520 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 1521 } 1522 } 1523 1524 /// Given an array of pointers to variables, project the address of a 1525 /// given variable. 1526 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 1527 unsigned Index, const VarDecl *Var) { 1528 // Pull out the pointer to the variable. 1529 Address PtrAddr = 1530 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 1531 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 1532 1533 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 1534 Addr = CGF.Builder.CreateElementBitCast( 1535 Addr, CGF.ConvertTypeForMem(Var->getType())); 1536 return Addr; 1537 } 1538 1539 static llvm::Value *emitCopyprivateCopyFunction( 1540 CodeGenModule &CGM, llvm::Type *ArgsType, 1541 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 1542 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 1543 auto &C = CGM.getContext(); 1544 // void copy_func(void *LHSArg, void *RHSArg); 1545 FunctionArgList Args; 1546 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1547 C.VoidPtrTy); 1548 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1549 C.VoidPtrTy); 1550 Args.push_back(&LHSArg); 1551 Args.push_back(&RHSArg); 1552 FunctionType::ExtInfo EI; 1553 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1554 C.VoidTy, Args, EI, /*isVariadic=*/false); 1555 auto *Fn = llvm::Function::Create( 1556 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1557 ".omp.copyprivate.copy_func", &CGM.getModule()); 1558 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 1559 CodeGenFunction CGF(CGM); 1560 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1561 // Dest = (void*[n])(LHSArg); 1562 // Src = (void*[n])(RHSArg); 1563 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1564 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 1565 ArgsType), CGF.getPointerAlign()); 1566 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1567 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 1568 ArgsType), CGF.getPointerAlign()); 1569 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1570 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1571 // ... 1572 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1573 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1574 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 1575 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 1576 1577 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 1578 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 1579 1580 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 1581 QualType Type = VD->getType(); 1582 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 1583 } 1584 CGF.FinishFunction(); 1585 return Fn; 1586 } 1587 1588 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1589 const RegionCodeGenTy &SingleOpGen, 1590 SourceLocation Loc, 1591 ArrayRef<const Expr *> CopyprivateVars, 1592 ArrayRef<const Expr *> SrcExprs, 1593 ArrayRef<const Expr *> DstExprs, 1594 ArrayRef<const Expr *> AssignmentOps) { 1595 if (!CGF.HaveInsertPoint()) 1596 return; 1597 assert(CopyprivateVars.size() == SrcExprs.size() && 1598 CopyprivateVars.size() == DstExprs.size() && 1599 CopyprivateVars.size() == AssignmentOps.size()); 1600 auto &C = CGM.getContext(); 1601 // int32 did_it = 0; 1602 // if(__kmpc_single(ident_t *, gtid)) { 1603 // SingleOpGen(); 1604 // __kmpc_end_single(ident_t *, gtid); 1605 // did_it = 1; 1606 // } 1607 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1608 // <copy_func>, did_it); 1609 1610 Address DidIt = Address::invalid(); 1611 if (!CopyprivateVars.empty()) { 1612 // int32 did_it = 0; 1613 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1614 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 1615 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 1616 } 1617 // Prepare arguments and build a call to __kmpc_single 1618 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1619 auto *IsSingle = 1620 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 1621 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1622 SingleCallEndCleanup; 1623 emitIfStmt( 1624 CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { 1625 CodeGenFunction::RunCleanupsScope Scope(CGF); 1626 CGF.EHStack.pushCleanup<SingleCallEndCleanup>( 1627 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), 1628 llvm::makeArrayRef(Args)); 1629 SingleOpGen(CGF); 1630 if (DidIt.isValid()) { 1631 // did_it = 1; 1632 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 1633 } 1634 }); 1635 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1636 // <copy_func>, did_it); 1637 if (DidIt.isValid()) { 1638 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 1639 auto CopyprivateArrayTy = 1640 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1641 /*IndexTypeQuals=*/0); 1642 // Create a list of all private variables for copyprivate. 1643 Address CopyprivateList = 1644 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 1645 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 1646 Address Elem = CGF.Builder.CreateConstArrayGEP( 1647 CopyprivateList, I, CGF.getPointerSize()); 1648 CGF.Builder.CreateStore( 1649 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1650 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 1651 Elem); 1652 } 1653 // Build function that copies private values from single region to all other 1654 // threads in the corresponding parallel region. 1655 auto *CpyFn = emitCopyprivateCopyFunction( 1656 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 1657 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 1658 auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy); 1659 Address CL = 1660 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 1661 CGF.VoidPtrTy); 1662 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 1663 llvm::Value *Args[] = { 1664 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 1665 getThreadID(CGF, Loc), // i32 <gtid> 1666 BufSize, // size_t <buf_size> 1667 CL.getPointer(), // void *<copyprivate list> 1668 CpyFn, // void (*) (void *, void *) <copy_func> 1669 DidItVal // i32 did_it 1670 }; 1671 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 1672 } 1673 } 1674 1675 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 1676 const RegionCodeGenTy &OrderedOpGen, 1677 SourceLocation Loc, bool IsThreads) { 1678 if (!CGF.HaveInsertPoint()) 1679 return; 1680 // __kmpc_ordered(ident_t *, gtid); 1681 // OrderedOpGen(); 1682 // __kmpc_end_ordered(ident_t *, gtid); 1683 // Prepare arguments and build a call to __kmpc_ordered 1684 CodeGenFunction::RunCleanupsScope Scope(CGF); 1685 if (IsThreads) { 1686 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1687 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); 1688 // Build a call to __kmpc_end_ordered 1689 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1690 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), 1691 llvm::makeArrayRef(Args)); 1692 } 1693 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 1694 } 1695 1696 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 1697 OpenMPDirectiveKind Kind, bool EmitChecks, 1698 bool ForceSimpleCall) { 1699 if (!CGF.HaveInsertPoint()) 1700 return; 1701 // Build call __kmpc_cancel_barrier(loc, thread_id); 1702 // Build call __kmpc_barrier(loc, thread_id); 1703 OpenMPLocationFlags Flags = OMP_IDENT_KMPC; 1704 if (Kind == OMPD_for) { 1705 Flags = 1706 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); 1707 } else if (Kind == OMPD_sections) { 1708 Flags = static_cast<OpenMPLocationFlags>(Flags | 1709 OMP_IDENT_BARRIER_IMPL_SECTIONS); 1710 } else if (Kind == OMPD_single) { 1711 Flags = 1712 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); 1713 } else if (Kind == OMPD_barrier) { 1714 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); 1715 } else { 1716 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); 1717 } 1718 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 1719 // thread_id); 1720 auto *OMPRegionInfo = 1721 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 1722 // Do not emit barrier call in the single directive emitted in some rare cases 1723 // for sections directives. 1724 if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single) 1725 return; 1726 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 1727 getThreadID(CGF, Loc)}; 1728 if (OMPRegionInfo) { 1729 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 1730 auto *Result = CGF.EmitRuntimeCall( 1731 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 1732 if (EmitChecks) { 1733 // if (__kmpc_cancel_barrier()) { 1734 // exit from construct; 1735 // } 1736 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 1737 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 1738 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 1739 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 1740 CGF.EmitBlock(ExitBB); 1741 // exit from construct; 1742 auto CancelDestination = 1743 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 1744 CGF.EmitBranchThroughCleanup(CancelDestination); 1745 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 1746 } 1747 return; 1748 } 1749 } 1750 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 1751 } 1752 1753 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 1754 /// the enum sched_type in kmp.h). 1755 enum OpenMPSchedType { 1756 /// \brief Lower bound for default (unordered) versions. 1757 OMP_sch_lower = 32, 1758 OMP_sch_static_chunked = 33, 1759 OMP_sch_static = 34, 1760 OMP_sch_dynamic_chunked = 35, 1761 OMP_sch_guided_chunked = 36, 1762 OMP_sch_runtime = 37, 1763 OMP_sch_auto = 38, 1764 /// \brief Lower bound for 'ordered' versions. 1765 OMP_ord_lower = 64, 1766 OMP_ord_static_chunked = 65, 1767 OMP_ord_static = 66, 1768 OMP_ord_dynamic_chunked = 67, 1769 OMP_ord_guided_chunked = 68, 1770 OMP_ord_runtime = 69, 1771 OMP_ord_auto = 70, 1772 OMP_sch_default = OMP_sch_static, 1773 }; 1774 1775 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 1776 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 1777 bool Chunked, bool Ordered) { 1778 switch (ScheduleKind) { 1779 case OMPC_SCHEDULE_static: 1780 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 1781 : (Ordered ? OMP_ord_static : OMP_sch_static); 1782 case OMPC_SCHEDULE_dynamic: 1783 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 1784 case OMPC_SCHEDULE_guided: 1785 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 1786 case OMPC_SCHEDULE_runtime: 1787 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 1788 case OMPC_SCHEDULE_auto: 1789 return Ordered ? OMP_ord_auto : OMP_sch_auto; 1790 case OMPC_SCHEDULE_unknown: 1791 assert(!Chunked && "chunk was specified but schedule kind not known"); 1792 return Ordered ? OMP_ord_static : OMP_sch_static; 1793 } 1794 llvm_unreachable("Unexpected runtime schedule"); 1795 } 1796 1797 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 1798 bool Chunked) const { 1799 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 1800 return Schedule == OMP_sch_static; 1801 } 1802 1803 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 1804 auto Schedule = 1805 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 1806 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 1807 return Schedule != OMP_sch_static; 1808 } 1809 1810 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 1811 SourceLocation Loc, 1812 OpenMPScheduleClauseKind ScheduleKind, 1813 unsigned IVSize, bool IVSigned, 1814 bool Ordered, llvm::Value *UB, 1815 llvm::Value *Chunk) { 1816 if (!CGF.HaveInsertPoint()) 1817 return; 1818 OpenMPSchedType Schedule = 1819 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1820 assert(Ordered || 1821 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 1822 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); 1823 // Call __kmpc_dispatch_init( 1824 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 1825 // kmp_int[32|64] lower, kmp_int[32|64] upper, 1826 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 1827 1828 // If the Chunk was not specified in the clause - use default value 1. 1829 if (Chunk == nullptr) 1830 Chunk = CGF.Builder.getIntN(IVSize, 1); 1831 llvm::Value *Args[] = { 1832 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1833 getThreadID(CGF, Loc), 1834 CGF.Builder.getInt32(Schedule), // Schedule type 1835 CGF.Builder.getIntN(IVSize, 0), // Lower 1836 UB, // Upper 1837 CGF.Builder.getIntN(IVSize, 1), // Stride 1838 Chunk // Chunk 1839 }; 1840 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 1841 } 1842 1843 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 1844 SourceLocation Loc, 1845 OpenMPScheduleClauseKind ScheduleKind, 1846 unsigned IVSize, bool IVSigned, 1847 bool Ordered, Address IL, Address LB, 1848 Address UB, Address ST, 1849 llvm::Value *Chunk) { 1850 if (!CGF.HaveInsertPoint()) 1851 return; 1852 OpenMPSchedType Schedule = 1853 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1854 assert(!Ordered); 1855 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 1856 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked); 1857 1858 // Call __kmpc_for_static_init( 1859 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 1860 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 1861 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 1862 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 1863 if (Chunk == nullptr) { 1864 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && 1865 "expected static non-chunked schedule"); 1866 // If the Chunk was not specified in the clause - use default value 1. 1867 Chunk = CGF.Builder.getIntN(IVSize, 1); 1868 } else { 1869 assert((Schedule == OMP_sch_static_chunked || 1870 Schedule == OMP_ord_static_chunked) && 1871 "expected static chunked schedule"); 1872 } 1873 llvm::Value *Args[] = { 1874 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1875 getThreadID(CGF, Loc), 1876 CGF.Builder.getInt32(Schedule), // Schedule type 1877 IL.getPointer(), // &isLastIter 1878 LB.getPointer(), // &LB 1879 UB.getPointer(), // &UB 1880 ST.getPointer(), // &Stride 1881 CGF.Builder.getIntN(IVSize, 1), // Incr 1882 Chunk // Chunk 1883 }; 1884 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); 1885 } 1886 1887 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 1888 SourceLocation Loc) { 1889 if (!CGF.HaveInsertPoint()) 1890 return; 1891 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1892 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1893 getThreadID(CGF, Loc)}; 1894 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1895 Args); 1896 } 1897 1898 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 1899 SourceLocation Loc, 1900 unsigned IVSize, 1901 bool IVSigned) { 1902 if (!CGF.HaveInsertPoint()) 1903 return; 1904 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 1905 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1906 getThreadID(CGF, Loc)}; 1907 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 1908 } 1909 1910 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 1911 SourceLocation Loc, unsigned IVSize, 1912 bool IVSigned, Address IL, 1913 Address LB, Address UB, 1914 Address ST) { 1915 // Call __kmpc_dispatch_next( 1916 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 1917 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 1918 // kmp_int[32|64] *p_stride); 1919 llvm::Value *Args[] = { 1920 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 1921 IL.getPointer(), // &isLastIter 1922 LB.getPointer(), // &Lower 1923 UB.getPointer(), // &Upper 1924 ST.getPointer() // &Stride 1925 }; 1926 llvm::Value *Call = 1927 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 1928 return CGF.EmitScalarConversion( 1929 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 1930 CGF.getContext().BoolTy, Loc); 1931 } 1932 1933 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1934 llvm::Value *NumThreads, 1935 SourceLocation Loc) { 1936 if (!CGF.HaveInsertPoint()) 1937 return; 1938 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1939 llvm::Value *Args[] = { 1940 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1941 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1942 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1943 Args); 1944 } 1945 1946 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 1947 OpenMPProcBindClauseKind ProcBind, 1948 SourceLocation Loc) { 1949 if (!CGF.HaveInsertPoint()) 1950 return; 1951 // Constants for proc bind value accepted by the runtime. 1952 enum ProcBindTy { 1953 ProcBindFalse = 0, 1954 ProcBindTrue, 1955 ProcBindMaster, 1956 ProcBindClose, 1957 ProcBindSpread, 1958 ProcBindIntel, 1959 ProcBindDefault 1960 } RuntimeProcBind; 1961 switch (ProcBind) { 1962 case OMPC_PROC_BIND_master: 1963 RuntimeProcBind = ProcBindMaster; 1964 break; 1965 case OMPC_PROC_BIND_close: 1966 RuntimeProcBind = ProcBindClose; 1967 break; 1968 case OMPC_PROC_BIND_spread: 1969 RuntimeProcBind = ProcBindSpread; 1970 break; 1971 case OMPC_PROC_BIND_unknown: 1972 llvm_unreachable("Unsupported proc_bind value."); 1973 } 1974 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 1975 llvm::Value *Args[] = { 1976 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1977 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 1978 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 1979 } 1980 1981 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1982 SourceLocation Loc) { 1983 if (!CGF.HaveInsertPoint()) 1984 return; 1985 // Build call void __kmpc_flush(ident_t *loc) 1986 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1987 emitUpdateLocation(CGF, Loc)); 1988 } 1989 1990 namespace { 1991 /// \brief Indexes of fields for type kmp_task_t. 1992 enum KmpTaskTFields { 1993 /// \brief List of shared variables. 1994 KmpTaskTShareds, 1995 /// \brief Task routine. 1996 KmpTaskTRoutine, 1997 /// \brief Partition id for the untied tasks. 1998 KmpTaskTPartId, 1999 /// \brief Function with call of destructors for private variables. 2000 KmpTaskTDestructors, 2001 }; 2002 } // anonymous namespace 2003 2004 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2005 // FIXME: Add other entries type when they become supported. 2006 return OffloadEntriesTargetRegion.empty(); 2007 } 2008 2009 /// \brief Initialize target region entry. 2010 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2011 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2012 StringRef ParentName, unsigned LineNum, 2013 unsigned ColNum, unsigned Order) { 2014 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2015 "only required for the device " 2016 "code generation."); 2017 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = 2018 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); 2019 ++OffloadingEntriesNum; 2020 } 2021 2022 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2023 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2024 StringRef ParentName, unsigned LineNum, 2025 unsigned ColNum, llvm::Constant *Addr, 2026 llvm::Constant *ID) { 2027 // If we are emitting code for a target, the entry is already initialized, 2028 // only has to be registered. 2029 if (CGM.getLangOpts().OpenMPIsDevice) { 2030 assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2031 ColNum) && 2032 "Entry must exist."); 2033 auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName] 2034 [LineNum][ColNum]; 2035 assert(Entry.isValid() && "Entry not initialized!"); 2036 Entry.setAddress(Addr); 2037 Entry.setID(ID); 2038 return; 2039 } else { 2040 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); 2041 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = 2042 Entry; 2043 } 2044 } 2045 2046 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2047 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2048 unsigned ColNum) const { 2049 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2050 if (PerDevice == OffloadEntriesTargetRegion.end()) 2051 return false; 2052 auto PerFile = PerDevice->second.find(FileID); 2053 if (PerFile == PerDevice->second.end()) 2054 return false; 2055 auto PerParentName = PerFile->second.find(ParentName); 2056 if (PerParentName == PerFile->second.end()) 2057 return false; 2058 auto PerLine = PerParentName->second.find(LineNum); 2059 if (PerLine == PerParentName->second.end()) 2060 return false; 2061 auto PerColumn = PerLine->second.find(ColNum); 2062 if (PerColumn == PerLine->second.end()) 2063 return false; 2064 // Fail if this entry is already registered. 2065 if (PerColumn->second.getAddress() || PerColumn->second.getID()) 2066 return false; 2067 return true; 2068 } 2069 2070 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2071 const OffloadTargetRegionEntryInfoActTy &Action) { 2072 // Scan all target region entries and perform the provided action. 2073 for (auto &D : OffloadEntriesTargetRegion) 2074 for (auto &F : D.second) 2075 for (auto &P : F.second) 2076 for (auto &L : P.second) 2077 for (auto &C : L.second) 2078 Action(D.first, F.first, P.first(), L.first, C.first, C.second); 2079 } 2080 2081 /// \brief Create a Ctor/Dtor-like function whose body is emitted through 2082 /// \a Codegen. This is used to emit the two functions that register and 2083 /// unregister the descriptor of the current compilation unit. 2084 static llvm::Function * 2085 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, 2086 const RegionCodeGenTy &Codegen) { 2087 auto &C = CGM.getContext(); 2088 FunctionArgList Args; 2089 ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), 2090 /*Id=*/nullptr, C.VoidPtrTy); 2091 Args.push_back(&DummyPtr); 2092 2093 CodeGenFunction CGF(CGM); 2094 GlobalDecl(); 2095 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 2096 C.VoidTy, Args, FunctionType::ExtInfo(), 2097 /*isVariadic=*/false); 2098 auto FTy = CGM.getTypes().GetFunctionType(FI); 2099 auto *Fn = 2100 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); 2101 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); 2102 Codegen(CGF); 2103 CGF.FinishFunction(); 2104 return Fn; 2105 } 2106 2107 llvm::Function * 2108 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 2109 2110 // If we don't have entries or if we are emitting code for the device, we 2111 // don't need to do anything. 2112 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 2113 return nullptr; 2114 2115 auto &M = CGM.getModule(); 2116 auto &C = CGM.getContext(); 2117 2118 // Get list of devices we care about 2119 auto &Devices = CGM.getLangOpts().OMPTargetTriples; 2120 2121 // We should be creating an offloading descriptor only if there are devices 2122 // specified. 2123 assert(!Devices.empty() && "No OpenMP offloading devices??"); 2124 2125 // Create the external variables that will point to the begin and end of the 2126 // host entries section. These will be defined by the linker. 2127 auto *OffloadEntryTy = 2128 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 2129 llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( 2130 M, OffloadEntryTy, /*isConstant=*/true, 2131 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, 2132 ".omp_offloading.entries_begin"); 2133 llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( 2134 M, OffloadEntryTy, /*isConstant=*/true, 2135 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, 2136 ".omp_offloading.entries_end"); 2137 2138 // Create all device images 2139 llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires; 2140 auto *DeviceImageTy = cast<llvm::StructType>( 2141 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 2142 2143 for (unsigned i = 0; i < Devices.size(); ++i) { 2144 StringRef T = Devices[i].getTriple(); 2145 auto *ImgBegin = new llvm::GlobalVariable( 2146 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2147 /*Initializer=*/0, Twine(".omp_offloading.img_start.") + Twine(T)); 2148 auto *ImgEnd = new llvm::GlobalVariable( 2149 M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, 2150 /*Initializer=*/0, Twine(".omp_offloading.img_end.") + Twine(T)); 2151 2152 llvm::Constant *Dev = 2153 llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd, 2154 HostEntriesBegin, HostEntriesEnd, nullptr); 2155 DeviceImagesEntires.push_back(Dev); 2156 } 2157 2158 // Create device images global array. 2159 llvm::ArrayType *DeviceImagesInitTy = 2160 llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size()); 2161 llvm::Constant *DeviceImagesInit = 2162 llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires); 2163 2164 llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable( 2165 M, DeviceImagesInitTy, /*isConstant=*/true, 2166 llvm::GlobalValue::InternalLinkage, DeviceImagesInit, 2167 ".omp_offloading.device_images"); 2168 DeviceImages->setUnnamedAddr(true); 2169 2170 // This is a Zero array to be used in the creation of the constant expressions 2171 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 2172 llvm::Constant::getNullValue(CGM.Int32Ty)}; 2173 2174 // Create the target region descriptor. 2175 auto *BinaryDescriptorTy = cast<llvm::StructType>( 2176 CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); 2177 llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get( 2178 BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 2179 llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages, 2180 Index), 2181 HostEntriesBegin, HostEntriesEnd, nullptr); 2182 2183 auto *Desc = new llvm::GlobalVariable( 2184 M, BinaryDescriptorTy, /*isConstant=*/true, 2185 llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit, 2186 ".omp_offloading.descriptor"); 2187 2188 // Emit code to register or unregister the descriptor at execution 2189 // startup or closing, respectively. 2190 2191 // Create a variable to drive the registration and unregistration of the 2192 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 2193 auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); 2194 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), 2195 IdentInfo, C.CharTy); 2196 2197 auto *UnRegFn = createOffloadingBinaryDescriptorFunction( 2198 CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) { 2199 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 2200 Desc); 2201 }); 2202 auto *RegFn = createOffloadingBinaryDescriptorFunction( 2203 CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) { 2204 CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), 2205 Desc); 2206 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 2207 }); 2208 return RegFn; 2209 } 2210 2211 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name, 2212 uint64_t Size) { 2213 auto *TgtOffloadEntryType = cast<llvm::StructType>( 2214 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); 2215 llvm::LLVMContext &C = CGM.getModule().getContext(); 2216 llvm::Module &M = CGM.getModule(); 2217 2218 // Make sure the address has the right type. 2219 llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy); 2220 2221 // Create constant string with the name. 2222 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 2223 2224 llvm::GlobalVariable *Str = 2225 new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, 2226 llvm::GlobalValue::InternalLinkage, StrPtrInit, 2227 ".omp_offloading.entry_name"); 2228 Str->setUnnamedAddr(true); 2229 llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); 2230 2231 // Create the entry struct. 2232 llvm::Constant *EntryInit = llvm::ConstantStruct::get( 2233 TgtOffloadEntryType, AddrPtr, StrPtr, 2234 llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr); 2235 llvm::GlobalVariable *Entry = new llvm::GlobalVariable( 2236 M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage, 2237 EntryInit, ".omp_offloading.entry"); 2238 2239 // The entry has to be created in the section the linker expects it to be. 2240 Entry->setSection(".omp_offloading.entries"); 2241 // We can't have any padding between symbols, so we need to have 1-byte 2242 // alignment. 2243 Entry->setAlignment(1); 2244 return; 2245 } 2246 2247 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2248 // Emit the offloading entries and metadata so that the device codegen side 2249 // can 2250 // easily figure out what to emit. The produced metadata looks like this: 2251 // 2252 // !omp_offload.info = !{!1, ...} 2253 // 2254 // Right now we only generate metadata for function that contain target 2255 // regions. 2256 2257 // If we do not have entries, we dont need to do anything. 2258 if (OffloadEntriesInfoManager.empty()) 2259 return; 2260 2261 llvm::Module &M = CGM.getModule(); 2262 llvm::LLVMContext &C = M.getContext(); 2263 SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 2264 OrderedEntries(OffloadEntriesInfoManager.size()); 2265 2266 // Create the offloading info metadata node. 2267 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 2268 2269 // Auxiliar methods to create metadata values and strings. 2270 auto getMDInt = [&](unsigned v) { 2271 return llvm::ConstantAsMetadata::get( 2272 llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); 2273 }; 2274 2275 auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; 2276 2277 // Create function that emits metadata for each target region entry; 2278 auto &&TargetRegionMetadataEmitter = [&]( 2279 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, 2280 unsigned Column, 2281 OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 2282 llvm::SmallVector<llvm::Metadata *, 32> Ops; 2283 // Generate metadata for target regions. Each entry of this metadata 2284 // contains: 2285 // - Entry 0 -> Kind of this type of metadata (0). 2286 // - Entry 1 -> Device ID of the file where the entry was identified. 2287 // - Entry 2 -> File ID of the file where the entry was identified. 2288 // - Entry 3 -> Mangled name of the function where the entry was identified. 2289 // - Entry 4 -> Line in the file where the entry was identified. 2290 // - Entry 5 -> Column in the file where the entry was identified. 2291 // - Entry 6 -> Order the entry was created. 2292 // The first element of the metadata node is the kind. 2293 Ops.push_back(getMDInt(E.getKind())); 2294 Ops.push_back(getMDInt(DeviceID)); 2295 Ops.push_back(getMDInt(FileID)); 2296 Ops.push_back(getMDString(ParentName)); 2297 Ops.push_back(getMDInt(Line)); 2298 Ops.push_back(getMDInt(Column)); 2299 Ops.push_back(getMDInt(E.getOrder())); 2300 2301 // Save this entry in the right position of the ordered entries array. 2302 OrderedEntries[E.getOrder()] = &E; 2303 2304 // Add metadata to the named metadata node. 2305 MD->addOperand(llvm::MDNode::get(C, Ops)); 2306 }; 2307 2308 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 2309 TargetRegionMetadataEmitter); 2310 2311 for (auto *E : OrderedEntries) { 2312 assert(E && "All ordered entries must exist!"); 2313 if (auto *CE = 2314 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 2315 E)) { 2316 assert(CE->getID() && CE->getAddress() && 2317 "Entry ID and Addr are invalid!"); 2318 createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0); 2319 } else 2320 llvm_unreachable("Unsupported entry kind."); 2321 } 2322 } 2323 2324 /// \brief Loads all the offload entries information from the host IR 2325 /// metadata. 2326 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 2327 // If we are in target mode, load the metadata from the host IR. This code has 2328 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 2329 2330 if (!CGM.getLangOpts().OpenMPIsDevice) 2331 return; 2332 2333 if (CGM.getLangOpts().OMPHostIRFile.empty()) 2334 return; 2335 2336 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 2337 if (Buf.getError()) 2338 return; 2339 2340 llvm::LLVMContext C; 2341 auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C); 2342 2343 if (ME.getError()) 2344 return; 2345 2346 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 2347 if (!MD) 2348 return; 2349 2350 for (auto I : MD->operands()) { 2351 llvm::MDNode *MN = cast<llvm::MDNode>(I); 2352 2353 auto getMDInt = [&](unsigned Idx) { 2354 llvm::ConstantAsMetadata *V = 2355 cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 2356 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 2357 }; 2358 2359 auto getMDString = [&](unsigned Idx) { 2360 llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); 2361 return V->getString(); 2362 }; 2363 2364 switch (getMDInt(0)) { 2365 default: 2366 llvm_unreachable("Unexpected metadata!"); 2367 break; 2368 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 2369 OFFLOAD_ENTRY_INFO_TARGET_REGION: 2370 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 2371 /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), 2372 /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), 2373 /*Column=*/getMDInt(5), /*Order=*/getMDInt(6)); 2374 break; 2375 } 2376 } 2377 } 2378 2379 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 2380 if (!KmpRoutineEntryPtrTy) { 2381 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 2382 auto &C = CGM.getContext(); 2383 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 2384 FunctionProtoType::ExtProtoInfo EPI; 2385 KmpRoutineEntryPtrQTy = C.getPointerType( 2386 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 2387 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 2388 } 2389 } 2390 2391 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 2392 QualType FieldTy) { 2393 auto *Field = FieldDecl::Create( 2394 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 2395 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 2396 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 2397 Field->setAccess(AS_public); 2398 DC->addDecl(Field); 2399 return Field; 2400 } 2401 2402 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 2403 2404 // Make sure the type of the entry is already created. This is the type we 2405 // have to create: 2406 // struct __tgt_offload_entry{ 2407 // void *addr; // Pointer to the offload entry info. 2408 // // (function or global) 2409 // char *name; // Name of the function or global. 2410 // size_t size; // Size of the entry info (0 if it a function). 2411 // }; 2412 if (TgtOffloadEntryQTy.isNull()) { 2413 ASTContext &C = CGM.getContext(); 2414 auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); 2415 RD->startDefinition(); 2416 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2417 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 2418 addFieldToRecordDecl(C, RD, C.getSizeType()); 2419 RD->completeDefinition(); 2420 TgtOffloadEntryQTy = C.getRecordType(RD); 2421 } 2422 return TgtOffloadEntryQTy; 2423 } 2424 2425 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 2426 // These are the types we need to build: 2427 // struct __tgt_device_image{ 2428 // void *ImageStart; // Pointer to the target code start. 2429 // void *ImageEnd; // Pointer to the target code end. 2430 // // We also add the host entries to the device image, as it may be useful 2431 // // for the target runtime to have access to that information. 2432 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 2433 // // the entries. 2434 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 2435 // // entries (non inclusive). 2436 // }; 2437 if (TgtDeviceImageQTy.isNull()) { 2438 ASTContext &C = CGM.getContext(); 2439 auto *RD = C.buildImplicitRecord("__tgt_device_image"); 2440 RD->startDefinition(); 2441 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2442 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2443 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2444 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2445 RD->completeDefinition(); 2446 TgtDeviceImageQTy = C.getRecordType(RD); 2447 } 2448 return TgtDeviceImageQTy; 2449 } 2450 2451 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 2452 // struct __tgt_bin_desc{ 2453 // int32_t NumDevices; // Number of devices supported. 2454 // __tgt_device_image *DeviceImages; // Arrays of device images 2455 // // (one per device). 2456 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 2457 // // entries. 2458 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 2459 // // entries (non inclusive). 2460 // }; 2461 if (TgtBinaryDescriptorQTy.isNull()) { 2462 ASTContext &C = CGM.getContext(); 2463 auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); 2464 RD->startDefinition(); 2465 addFieldToRecordDecl( 2466 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 2467 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 2468 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2469 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 2470 RD->completeDefinition(); 2471 TgtBinaryDescriptorQTy = C.getRecordType(RD); 2472 } 2473 return TgtBinaryDescriptorQTy; 2474 } 2475 2476 namespace { 2477 struct PrivateHelpersTy { 2478 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 2479 const VarDecl *PrivateElemInit) 2480 : Original(Original), PrivateCopy(PrivateCopy), 2481 PrivateElemInit(PrivateElemInit) {} 2482 const VarDecl *Original; 2483 const VarDecl *PrivateCopy; 2484 const VarDecl *PrivateElemInit; 2485 }; 2486 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 2487 } // anonymous namespace 2488 2489 static RecordDecl * 2490 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 2491 if (!Privates.empty()) { 2492 auto &C = CGM.getContext(); 2493 // Build struct .kmp_privates_t. { 2494 // /* private vars */ 2495 // }; 2496 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 2497 RD->startDefinition(); 2498 for (auto &&Pair : Privates) { 2499 auto *VD = Pair.second.Original; 2500 auto Type = VD->getType(); 2501 Type = Type.getNonReferenceType(); 2502 auto *FD = addFieldToRecordDecl(C, RD, Type); 2503 if (VD->hasAttrs()) { 2504 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 2505 E(VD->getAttrs().end()); 2506 I != E; ++I) 2507 FD->addAttr(*I); 2508 } 2509 } 2510 RD->completeDefinition(); 2511 return RD; 2512 } 2513 return nullptr; 2514 } 2515 2516 static RecordDecl * 2517 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, 2518 QualType KmpRoutineEntryPointerQTy) { 2519 auto &C = CGM.getContext(); 2520 // Build struct kmp_task_t { 2521 // void * shareds; 2522 // kmp_routine_entry_t routine; 2523 // kmp_int32 part_id; 2524 // kmp_routine_entry_t destructors; 2525 // }; 2526 auto *RD = C.buildImplicitRecord("kmp_task_t"); 2527 RD->startDefinition(); 2528 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2529 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2530 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2531 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2532 RD->completeDefinition(); 2533 return RD; 2534 } 2535 2536 static RecordDecl * 2537 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 2538 ArrayRef<PrivateDataTy> Privates) { 2539 auto &C = CGM.getContext(); 2540 // Build struct kmp_task_t_with_privates { 2541 // kmp_task_t task_data; 2542 // .kmp_privates_t. privates; 2543 // }; 2544 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 2545 RD->startDefinition(); 2546 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 2547 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 2548 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 2549 } 2550 RD->completeDefinition(); 2551 return RD; 2552 } 2553 2554 /// \brief Emit a proxy function which accepts kmp_task_t as the second 2555 /// argument. 2556 /// \code 2557 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 2558 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, 2559 /// tt->shareds); 2560 /// return 0; 2561 /// } 2562 /// \endcode 2563 static llvm::Value * 2564 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 2565 QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, 2566 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 2567 QualType SharedsPtrTy, llvm::Value *TaskFunction, 2568 llvm::Value *TaskPrivatesMap) { 2569 auto &C = CGM.getContext(); 2570 FunctionArgList Args; 2571 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 2572 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 2573 /*Id=*/nullptr, 2574 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 2575 Args.push_back(&GtidArg); 2576 Args.push_back(&TaskTypeArg); 2577 FunctionType::ExtInfo Info; 2578 auto &TaskEntryFnInfo = 2579 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 2580 /*isVariadic=*/false); 2581 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 2582 auto *TaskEntry = 2583 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 2584 ".omp_task_entry.", &CGM.getModule()); 2585 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 2586 CodeGenFunction CGF(CGM); 2587 CGF.disableDebugInfo(); 2588 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 2589 2590 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 2591 // tt->task_data.shareds); 2592 auto *GtidParam = CGF.EmitLoadOfScalar( 2593 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 2594 LValue TDBase = emitLoadOfPointerLValue( 2595 CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); 2596 auto *KmpTaskTWithPrivatesQTyRD = 2597 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2598 LValue Base = 2599 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2600 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2601 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 2602 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 2603 auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); 2604 2605 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 2606 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 2607 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2608 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 2609 CGF.ConvertTypeForMem(SharedsPtrTy)); 2610 2611 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 2612 llvm::Value *PrivatesParam; 2613 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 2614 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 2615 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2616 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 2617 } else { 2618 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2619 } 2620 2621 llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, 2622 TaskPrivatesMap, SharedsParam}; 2623 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 2624 CGF.EmitStoreThroughLValue( 2625 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 2626 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 2627 CGF.FinishFunction(); 2628 return TaskEntry; 2629 } 2630 2631 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 2632 SourceLocation Loc, 2633 QualType KmpInt32Ty, 2634 QualType KmpTaskTWithPrivatesPtrQTy, 2635 QualType KmpTaskTWithPrivatesQTy) { 2636 auto &C = CGM.getContext(); 2637 FunctionArgList Args; 2638 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 2639 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 2640 /*Id=*/nullptr, 2641 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 2642 Args.push_back(&GtidArg); 2643 Args.push_back(&TaskTypeArg); 2644 FunctionType::ExtInfo Info; 2645 auto &DestructorFnInfo = 2646 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 2647 /*isVariadic=*/false); 2648 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 2649 auto *DestructorFn = 2650 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 2651 ".omp_task_destructor.", &CGM.getModule()); 2652 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 2653 DestructorFnInfo); 2654 CodeGenFunction CGF(CGM); 2655 CGF.disableDebugInfo(); 2656 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 2657 Args); 2658 2659 LValue Base = emitLoadOfPointerLValue( 2660 CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); 2661 auto *KmpTaskTWithPrivatesQTyRD = 2662 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2663 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2664 Base = CGF.EmitLValueForField(Base, *FI); 2665 for (auto *Field : 2666 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 2667 if (auto DtorKind = Field->getType().isDestructedType()) { 2668 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 2669 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 2670 } 2671 } 2672 CGF.FinishFunction(); 2673 return DestructorFn; 2674 } 2675 2676 /// \brief Emit a privates mapping function for correct handling of private and 2677 /// firstprivate variables. 2678 /// \code 2679 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 2680 /// **noalias priv1,..., <tyn> **noalias privn) { 2681 /// *priv1 = &.privates.priv1; 2682 /// ...; 2683 /// *privn = &.privates.privn; 2684 /// } 2685 /// \endcode 2686 static llvm::Value * 2687 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 2688 ArrayRef<const Expr *> PrivateVars, 2689 ArrayRef<const Expr *> FirstprivateVars, 2690 QualType PrivatesQTy, 2691 ArrayRef<PrivateDataTy> Privates) { 2692 auto &C = CGM.getContext(); 2693 FunctionArgList Args; 2694 ImplicitParamDecl TaskPrivatesArg( 2695 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 2696 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 2697 Args.push_back(&TaskPrivatesArg); 2698 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 2699 unsigned Counter = 1; 2700 for (auto *E: PrivateVars) { 2701 Args.push_back(ImplicitParamDecl::Create( 2702 C, /*DC=*/nullptr, Loc, 2703 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2704 .withConst() 2705 .withRestrict())); 2706 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2707 PrivateVarsPos[VD] = Counter; 2708 ++Counter; 2709 } 2710 for (auto *E : FirstprivateVars) { 2711 Args.push_back(ImplicitParamDecl::Create( 2712 C, /*DC=*/nullptr, Loc, 2713 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2714 .withConst() 2715 .withRestrict())); 2716 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2717 PrivateVarsPos[VD] = Counter; 2718 ++Counter; 2719 } 2720 FunctionType::ExtInfo Info; 2721 auto &TaskPrivatesMapFnInfo = 2722 CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info, 2723 /*isVariadic=*/false); 2724 auto *TaskPrivatesMapTy = 2725 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 2726 auto *TaskPrivatesMap = llvm::Function::Create( 2727 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 2728 ".omp_task_privates_map.", &CGM.getModule()); 2729 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 2730 TaskPrivatesMapFnInfo); 2731 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 2732 CodeGenFunction CGF(CGM); 2733 CGF.disableDebugInfo(); 2734 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 2735 TaskPrivatesMapFnInfo, Args); 2736 2737 // *privi = &.privates.privi; 2738 LValue Base = emitLoadOfPointerLValue( 2739 CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()); 2740 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 2741 Counter = 0; 2742 for (auto *Field : PrivatesQTyRD->fields()) { 2743 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 2744 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 2745 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 2746 auto RefLoadLVal = 2747 emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType()); 2748 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 2749 ++Counter; 2750 } 2751 CGF.FinishFunction(); 2752 return TaskPrivatesMap; 2753 } 2754 2755 static int array_pod_sort_comparator(const PrivateDataTy *P1, 2756 const PrivateDataTy *P2) { 2757 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 2758 } 2759 2760 void CGOpenMPRuntime::emitTaskCall( 2761 CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, 2762 bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 2763 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 2764 const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, 2765 ArrayRef<const Expr *> PrivateCopies, 2766 ArrayRef<const Expr *> FirstprivateVars, 2767 ArrayRef<const Expr *> FirstprivateCopies, 2768 ArrayRef<const Expr *> FirstprivateInits, 2769 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { 2770 if (!CGF.HaveInsertPoint()) 2771 return; 2772 auto &C = CGM.getContext(); 2773 llvm::SmallVector<PrivateDataTy, 8> Privates; 2774 // Aggregate privates and sort them by the alignment. 2775 auto I = PrivateCopies.begin(); 2776 for (auto *E : PrivateVars) { 2777 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2778 Privates.push_back(std::make_pair( 2779 C.getDeclAlign(VD), 2780 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2781 /*PrivateElemInit=*/nullptr))); 2782 ++I; 2783 } 2784 I = FirstprivateCopies.begin(); 2785 auto IElemInitRef = FirstprivateInits.begin(); 2786 for (auto *E : FirstprivateVars) { 2787 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2788 Privates.push_back(std::make_pair( 2789 C.getDeclAlign(VD), 2790 PrivateHelpersTy( 2791 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2792 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 2793 ++I, ++IElemInitRef; 2794 } 2795 llvm::array_pod_sort(Privates.begin(), Privates.end(), 2796 array_pod_sort_comparator); 2797 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2798 // Build type kmp_routine_entry_t (if not built yet). 2799 emitKmpRoutineEntryT(KmpInt32Ty); 2800 // Build type kmp_task_t (if not built yet). 2801 if (KmpTaskTQTy.isNull()) { 2802 KmpTaskTQTy = C.getRecordType( 2803 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); 2804 } 2805 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2806 // Build particular struct kmp_task_t for the given task. 2807 auto *KmpTaskTWithPrivatesQTyRD = 2808 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 2809 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 2810 QualType KmpTaskTWithPrivatesPtrQTy = 2811 C.getPointerType(KmpTaskTWithPrivatesQTy); 2812 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 2813 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 2814 auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy); 2815 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 2816 2817 // Emit initial values for private copies (if any). 2818 llvm::Value *TaskPrivatesMap = nullptr; 2819 auto *TaskPrivatesMapTy = 2820 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 2821 3) 2822 ->getType(); 2823 if (!Privates.empty()) { 2824 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2825 TaskPrivatesMap = emitTaskPrivateMappingFunction( 2826 CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); 2827 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2828 TaskPrivatesMap, TaskPrivatesMapTy); 2829 } else { 2830 TaskPrivatesMap = llvm::ConstantPointerNull::get( 2831 cast<llvm::PointerType>(TaskPrivatesMapTy)); 2832 } 2833 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 2834 // kmp_task_t *tt); 2835 auto *TaskEntry = emitProxyTaskFunction( 2836 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, 2837 KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); 2838 2839 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2840 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2841 // kmp_routine_entry_t *task_entry); 2842 // Task flags. Format is taken from 2843 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 2844 // description of kmp_tasking_flags struct. 2845 const unsigned TiedFlag = 0x1; 2846 const unsigned FinalFlag = 0x2; 2847 unsigned Flags = Tied ? TiedFlag : 0; 2848 auto *TaskFlags = 2849 Final.getPointer() 2850 ? CGF.Builder.CreateSelect(Final.getPointer(), 2851 CGF.Builder.getInt32(FinalFlag), 2852 CGF.Builder.getInt32(/*C=*/0)) 2853 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 2854 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 2855 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 2856 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 2857 getThreadID(CGF, Loc), TaskFlags, 2858 KmpTaskTWithPrivatesTySize, SharedsSize, 2859 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2860 TaskEntry, KmpRoutineEntryPtrTy)}; 2861 auto *NewTask = CGF.EmitRuntimeCall( 2862 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 2863 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2864 NewTask, KmpTaskTWithPrivatesPtrTy); 2865 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 2866 KmpTaskTWithPrivatesQTy); 2867 LValue TDBase = 2868 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2869 // Fill the data in the resulting kmp_task_t record. 2870 // Copy shareds if there are any. 2871 Address KmpTaskSharedsPtr = Address::invalid(); 2872 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 2873 KmpTaskSharedsPtr = 2874 Address(CGF.EmitLoadOfScalar( 2875 CGF.EmitLValueForField( 2876 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 2877 KmpTaskTShareds)), 2878 Loc), 2879 CGF.getNaturalTypeAlignment(SharedsTy)); 2880 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 2881 } 2882 // Emit initial values for private copies (if any). 2883 bool NeedsCleanup = false; 2884 if (!Privates.empty()) { 2885 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2886 auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); 2887 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 2888 LValue SharedsBase; 2889 if (!FirstprivateVars.empty()) { 2890 SharedsBase = CGF.MakeAddrLValue( 2891 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2892 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 2893 SharedsTy); 2894 } 2895 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 2896 cast<CapturedStmt>(*D.getAssociatedStmt())); 2897 for (auto &&Pair : Privates) { 2898 auto *VD = Pair.second.PrivateCopy; 2899 auto *Init = VD->getAnyInitializer(); 2900 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 2901 if (Init) { 2902 if (auto *Elem = Pair.second.PrivateElemInit) { 2903 auto *OriginalVD = Pair.second.Original; 2904 auto *SharedField = CapturesInfo.lookup(OriginalVD); 2905 auto SharedRefLValue = 2906 CGF.EmitLValueForField(SharedsBase, SharedField); 2907 SharedRefLValue = CGF.MakeAddrLValue( 2908 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 2909 SharedRefLValue.getType(), AlignmentSource::Decl); 2910 QualType Type = OriginalVD->getType(); 2911 if (Type->isArrayType()) { 2912 // Initialize firstprivate array. 2913 if (!isa<CXXConstructExpr>(Init) || 2914 CGF.isTrivialInitializer(Init)) { 2915 // Perform simple memcpy. 2916 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 2917 SharedRefLValue.getAddress(), Type); 2918 } else { 2919 // Initialize firstprivate array using element-by-element 2920 // intialization. 2921 CGF.EmitOMPAggregateAssign( 2922 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), 2923 Type, [&CGF, Elem, Init, &CapturesInfo]( 2924 Address DestElement, Address SrcElement) { 2925 // Clean up any temporaries needed by the initialization. 2926 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2927 InitScope.addPrivate(Elem, [SrcElement]() -> Address { 2928 return SrcElement; 2929 }); 2930 (void)InitScope.Privatize(); 2931 // Emit initialization for single element. 2932 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 2933 CGF, &CapturesInfo); 2934 CGF.EmitAnyExprToMem(Init, DestElement, 2935 Init->getType().getQualifiers(), 2936 /*IsInitializer=*/false); 2937 }); 2938 } 2939 } else { 2940 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2941 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 2942 return SharedRefLValue.getAddress(); 2943 }); 2944 (void)InitScope.Privatize(); 2945 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 2946 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 2947 /*capturedByInit=*/false); 2948 } 2949 } else { 2950 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 2951 } 2952 } 2953 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); 2954 ++FI; 2955 } 2956 } 2957 // Provide pointer to function with destructors for privates. 2958 llvm::Value *DestructorFn = 2959 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, 2960 KmpTaskTWithPrivatesPtrQTy, 2961 KmpTaskTWithPrivatesQTy) 2962 : llvm::ConstantPointerNull::get( 2963 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 2964 LValue Destructor = CGF.EmitLValueForField( 2965 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); 2966 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2967 DestructorFn, KmpRoutineEntryPtrTy), 2968 Destructor); 2969 2970 // Process list of dependences. 2971 Address DependenciesArray = Address::invalid(); 2972 unsigned NumDependencies = Dependences.size(); 2973 if (NumDependencies) { 2974 // Dependence kind for RTL. 2975 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 2976 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 2977 RecordDecl *KmpDependInfoRD; 2978 QualType FlagsTy = 2979 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 2980 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 2981 if (KmpDependInfoTy.isNull()) { 2982 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 2983 KmpDependInfoRD->startDefinition(); 2984 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 2985 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 2986 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 2987 KmpDependInfoRD->completeDefinition(); 2988 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 2989 } else { 2990 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 2991 } 2992 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 2993 // Define type kmp_depend_info[<Dependences.size()>]; 2994 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 2995 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 2996 ArrayType::Normal, /*IndexTypeQuals=*/0); 2997 // kmp_depend_info[<Dependences.size()>] deps; 2998 DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); 2999 for (unsigned i = 0; i < NumDependencies; ++i) { 3000 const Expr *E = Dependences[i].second; 3001 auto Addr = CGF.EmitLValue(E); 3002 llvm::Value *Size; 3003 QualType Ty = E->getType(); 3004 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3005 LValue UpAddrLVal = 3006 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 3007 llvm::Value *UpAddr = 3008 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 3009 llvm::Value *LowIntPtr = 3010 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 3011 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 3012 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3013 } else 3014 Size = getTypeSize(CGF, Ty); 3015 auto Base = CGF.MakeAddrLValue( 3016 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 3017 KmpDependInfoTy); 3018 // deps[i].base_addr = &<Dependences[i].second>; 3019 auto BaseAddrLVal = CGF.EmitLValueForField( 3020 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 3021 CGF.EmitStoreOfScalar( 3022 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 3023 BaseAddrLVal); 3024 // deps[i].len = sizeof(<Dependences[i].second>); 3025 auto LenLVal = CGF.EmitLValueForField( 3026 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 3027 CGF.EmitStoreOfScalar(Size, LenLVal); 3028 // deps[i].flags = <Dependences[i].first>; 3029 RTLDependenceKindTy DepKind; 3030 switch (Dependences[i].first) { 3031 case OMPC_DEPEND_in: 3032 DepKind = DepIn; 3033 break; 3034 // Out and InOut dependencies must use the same code. 3035 case OMPC_DEPEND_out: 3036 case OMPC_DEPEND_inout: 3037 DepKind = DepInOut; 3038 break; 3039 case OMPC_DEPEND_source: 3040 case OMPC_DEPEND_sink: 3041 case OMPC_DEPEND_unknown: 3042 llvm_unreachable("Unknown task dependence type"); 3043 } 3044 auto FlagsLVal = CGF.EmitLValueForField( 3045 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 3046 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 3047 FlagsLVal); 3048 } 3049 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3050 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 3051 CGF.VoidPtrTy); 3052 } 3053 3054 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 3055 // libcall. 3056 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 3057 // *new_task); 3058 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 3059 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 3060 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 3061 // list is not empty 3062 auto *ThreadID = getThreadID(CGF, Loc); 3063 auto *UpLoc = emitUpdateLocation(CGF, Loc); 3064 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 3065 llvm::Value *DepTaskArgs[7]; 3066 if (NumDependencies) { 3067 DepTaskArgs[0] = UpLoc; 3068 DepTaskArgs[1] = ThreadID; 3069 DepTaskArgs[2] = NewTask; 3070 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 3071 DepTaskArgs[4] = DependenciesArray.getPointer(); 3072 DepTaskArgs[5] = CGF.Builder.getInt32(0); 3073 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3074 } 3075 auto &&ThenCodeGen = [this, NumDependencies, 3076 &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) { 3077 // TODO: add check for untied tasks. 3078 if (NumDependencies) { 3079 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), 3080 DepTaskArgs); 3081 } else { 3082 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 3083 TaskArgs); 3084 } 3085 }; 3086 typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> 3087 IfCallEndCleanup; 3088 3089 llvm::Value *DepWaitTaskArgs[6]; 3090 if (NumDependencies) { 3091 DepWaitTaskArgs[0] = UpLoc; 3092 DepWaitTaskArgs[1] = ThreadID; 3093 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 3094 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 3095 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 3096 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3097 } 3098 auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 3099 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) { 3100 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 3101 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 3102 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 3103 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 3104 // is specified. 3105 if (NumDependencies) 3106 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 3107 DepWaitTaskArgs); 3108 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 3109 // kmp_task_t *new_task); 3110 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), 3111 TaskArgs); 3112 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 3113 // kmp_task_t *new_task); 3114 CGF.EHStack.pushCleanup<IfCallEndCleanup>( 3115 NormalAndEHCleanup, 3116 createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), 3117 llvm::makeArrayRef(TaskArgs)); 3118 3119 // Call proxy_task_entry(gtid, new_task); 3120 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 3121 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 3122 }; 3123 3124 if (IfCond) { 3125 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 3126 } else { 3127 CodeGenFunction::RunCleanupsScope Scope(CGF); 3128 ThenCodeGen(CGF); 3129 } 3130 } 3131 3132 /// \brief Emit reduction operation for each element of array (required for 3133 /// array sections) LHS op = RHS. 3134 /// \param Type Type of array. 3135 /// \param LHSVar Variable on the left side of the reduction operation 3136 /// (references element of array in original variable). 3137 /// \param RHSVar Variable on the right side of the reduction operation 3138 /// (references element of array in original variable). 3139 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 3140 /// RHSVar. 3141 static void EmitOMPAggregateReduction( 3142 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 3143 const VarDecl *RHSVar, 3144 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 3145 const Expr *, const Expr *)> &RedOpGen, 3146 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 3147 const Expr *UpExpr = nullptr) { 3148 // Perform element-by-element initialization. 3149 QualType ElementTy; 3150 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 3151 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 3152 3153 // Drill down to the base element type on both arrays. 3154 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 3155 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 3156 3157 auto RHSBegin = RHSAddr.getPointer(); 3158 auto LHSBegin = LHSAddr.getPointer(); 3159 // Cast from pointer to array type to pointer to single element. 3160 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 3161 // The basic structure here is a while-do loop. 3162 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 3163 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 3164 auto IsEmpty = 3165 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 3166 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 3167 3168 // Enter the loop body, making that address the current address. 3169 auto EntryBB = CGF.Builder.GetInsertBlock(); 3170 CGF.EmitBlock(BodyBB); 3171 3172 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 3173 3174 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 3175 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 3176 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 3177 Address RHSElementCurrent = 3178 Address(RHSElementPHI, 3179 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 3180 3181 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 3182 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 3183 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 3184 Address LHSElementCurrent = 3185 Address(LHSElementPHI, 3186 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 3187 3188 // Emit copy. 3189 CodeGenFunction::OMPPrivateScope Scope(CGF); 3190 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 3191 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 3192 Scope.Privatize(); 3193 RedOpGen(CGF, XExpr, EExpr, UpExpr); 3194 Scope.ForceCleanup(); 3195 3196 // Shift the address forward by one element. 3197 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 3198 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 3199 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 3200 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 3201 // Check whether we've reached the end. 3202 auto Done = 3203 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 3204 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 3205 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 3206 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 3207 3208 // Done. 3209 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 3210 } 3211 3212 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 3213 llvm::Type *ArgsType, 3214 ArrayRef<const Expr *> Privates, 3215 ArrayRef<const Expr *> LHSExprs, 3216 ArrayRef<const Expr *> RHSExprs, 3217 ArrayRef<const Expr *> ReductionOps) { 3218 auto &C = CGM.getContext(); 3219 3220 // void reduction_func(void *LHSArg, void *RHSArg); 3221 FunctionArgList Args; 3222 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 3223 C.VoidPtrTy); 3224 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 3225 C.VoidPtrTy); 3226 Args.push_back(&LHSArg); 3227 Args.push_back(&RHSArg); 3228 FunctionType::ExtInfo EI; 3229 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 3230 C.VoidTy, Args, EI, /*isVariadic=*/false); 3231 auto *Fn = llvm::Function::Create( 3232 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 3233 ".omp.reduction.reduction_func", &CGM.getModule()); 3234 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 3235 CodeGenFunction CGF(CGM); 3236 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 3237 3238 // Dst = (void*[n])(LHSArg); 3239 // Src = (void*[n])(RHSArg); 3240 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3241 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3242 ArgsType), CGF.getPointerAlign()); 3243 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3244 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3245 ArgsType), CGF.getPointerAlign()); 3246 3247 // ... 3248 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 3249 // ... 3250 CodeGenFunction::OMPPrivateScope Scope(CGF); 3251 auto IPriv = Privates.begin(); 3252 unsigned Idx = 0; 3253 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 3254 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 3255 Scope.addPrivate(RHSVar, [&]() -> Address { 3256 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 3257 }); 3258 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 3259 Scope.addPrivate(LHSVar, [&]() -> Address { 3260 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 3261 }); 3262 QualType PrivTy = (*IPriv)->getType(); 3263 if (PrivTy->isArrayType()) { 3264 // Get array size and emit VLA type. 3265 ++Idx; 3266 Address Elem = 3267 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 3268 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 3269 CodeGenFunction::OpaqueValueMapping OpaqueMap( 3270 CGF, 3271 cast<OpaqueValueExpr>( 3272 CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()), 3273 RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 3274 CGF.EmitVariablyModifiedType(PrivTy); 3275 } 3276 } 3277 Scope.Privatize(); 3278 IPriv = Privates.begin(); 3279 auto ILHS = LHSExprs.begin(); 3280 auto IRHS = RHSExprs.begin(); 3281 for (auto *E : ReductionOps) { 3282 if ((*IPriv)->getType()->isArrayType()) { 3283 // Emit reduction for array section. 3284 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3285 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3286 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3287 [=](CodeGenFunction &CGF, const Expr *, 3288 const Expr *, 3289 const Expr *) { CGF.EmitIgnoredExpr(E); }); 3290 } else 3291 // Emit reduction for array subscript or single variable. 3292 CGF.EmitIgnoredExpr(E); 3293 ++IPriv, ++ILHS, ++IRHS; 3294 } 3295 Scope.ForceCleanup(); 3296 CGF.FinishFunction(); 3297 return Fn; 3298 } 3299 3300 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 3301 ArrayRef<const Expr *> Privates, 3302 ArrayRef<const Expr *> LHSExprs, 3303 ArrayRef<const Expr *> RHSExprs, 3304 ArrayRef<const Expr *> ReductionOps, 3305 bool WithNowait, bool SimpleReduction) { 3306 if (!CGF.HaveInsertPoint()) 3307 return; 3308 // Next code should be emitted for reduction: 3309 // 3310 // static kmp_critical_name lock = { 0 }; 3311 // 3312 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 3313 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 3314 // ... 3315 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 3316 // *(Type<n>-1*)rhs[<n>-1]); 3317 // } 3318 // 3319 // ... 3320 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 3321 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 3322 // RedList, reduce_func, &<lock>)) { 3323 // case 1: 3324 // ... 3325 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 3326 // ... 3327 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 3328 // break; 3329 // case 2: 3330 // ... 3331 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 3332 // ... 3333 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 3334 // break; 3335 // default:; 3336 // } 3337 // 3338 // if SimpleReduction is true, only the next code is generated: 3339 // ... 3340 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 3341 // ... 3342 3343 auto &C = CGM.getContext(); 3344 3345 if (SimpleReduction) { 3346 CodeGenFunction::RunCleanupsScope Scope(CGF); 3347 auto IPriv = Privates.begin(); 3348 auto ILHS = LHSExprs.begin(); 3349 auto IRHS = RHSExprs.begin(); 3350 for (auto *E : ReductionOps) { 3351 if ((*IPriv)->getType()->isArrayType()) { 3352 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3353 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3354 EmitOMPAggregateReduction( 3355 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3356 [=](CodeGenFunction &CGF, const Expr *, const Expr *, 3357 const Expr *) { CGF.EmitIgnoredExpr(E); }); 3358 } else 3359 CGF.EmitIgnoredExpr(E); 3360 ++IPriv, ++ILHS, ++IRHS; 3361 } 3362 return; 3363 } 3364 3365 // 1. Build a list of reduction variables. 3366 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 3367 auto Size = RHSExprs.size(); 3368 for (auto *E : Privates) { 3369 if (E->getType()->isArrayType()) 3370 // Reserve place for array size. 3371 ++Size; 3372 } 3373 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 3374 QualType ReductionArrayTy = 3375 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3376 /*IndexTypeQuals=*/0); 3377 Address ReductionList = 3378 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 3379 auto IPriv = Privates.begin(); 3380 unsigned Idx = 0; 3381 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 3382 Address Elem = 3383 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 3384 CGF.Builder.CreateStore( 3385 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3386 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 3387 Elem); 3388 if ((*IPriv)->getType()->isArrayType()) { 3389 // Store array size. 3390 ++Idx; 3391 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 3392 CGF.getPointerSize()); 3393 CGF.Builder.CreateStore( 3394 CGF.Builder.CreateIntToPtr( 3395 CGF.Builder.CreateIntCast( 3396 CGF.getVLASize(CGF.getContext().getAsVariableArrayType( 3397 (*IPriv)->getType())) 3398 .first, 3399 CGF.SizeTy, /*isSigned=*/false), 3400 CGF.VoidPtrTy), 3401 Elem); 3402 } 3403 } 3404 3405 // 2. Emit reduce_func(). 3406 auto *ReductionFn = emitReductionFunction( 3407 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 3408 LHSExprs, RHSExprs, ReductionOps); 3409 3410 // 3. Create static kmp_critical_name lock = { 0 }; 3411 auto *Lock = getCriticalRegionLock(".reduction"); 3412 3413 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 3414 // RedList, reduce_func, &<lock>); 3415 auto *IdentTLoc = emitUpdateLocation( 3416 CGF, Loc, 3417 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); 3418 auto *ThreadId = getThreadID(CGF, Loc); 3419 auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy); 3420 auto *RL = 3421 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), 3422 CGF.VoidPtrTy); 3423 llvm::Value *Args[] = { 3424 IdentTLoc, // ident_t *<loc> 3425 ThreadId, // i32 <gtid> 3426 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 3427 ReductionArrayTySize, // size_type sizeof(RedList) 3428 RL, // void *RedList 3429 ReductionFn, // void (*) (void *, void *) <reduce_func> 3430 Lock // kmp_critical_name *&<lock> 3431 }; 3432 auto Res = CGF.EmitRuntimeCall( 3433 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 3434 : OMPRTL__kmpc_reduce), 3435 Args); 3436 3437 // 5. Build switch(res) 3438 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 3439 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 3440 3441 // 6. Build case 1: 3442 // ... 3443 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 3444 // ... 3445 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 3446 // break; 3447 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 3448 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 3449 CGF.EmitBlock(Case1BB); 3450 3451 { 3452 CodeGenFunction::RunCleanupsScope Scope(CGF); 3453 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 3454 llvm::Value *EndArgs[] = { 3455 IdentTLoc, // ident_t *<loc> 3456 ThreadId, // i32 <gtid> 3457 Lock // kmp_critical_name *&<lock> 3458 }; 3459 CGF.EHStack 3460 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 3461 NormalAndEHCleanup, 3462 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 3463 : OMPRTL__kmpc_end_reduce), 3464 llvm::makeArrayRef(EndArgs)); 3465 auto IPriv = Privates.begin(); 3466 auto ILHS = LHSExprs.begin(); 3467 auto IRHS = RHSExprs.begin(); 3468 for (auto *E : ReductionOps) { 3469 if ((*IPriv)->getType()->isArrayType()) { 3470 // Emit reduction for array section. 3471 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3472 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3473 EmitOMPAggregateReduction( 3474 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3475 [=](CodeGenFunction &CGF, const Expr *, const Expr *, 3476 const Expr *) { CGF.EmitIgnoredExpr(E); }); 3477 } else 3478 // Emit reduction for array subscript or single variable. 3479 CGF.EmitIgnoredExpr(E); 3480 ++IPriv, ++ILHS, ++IRHS; 3481 } 3482 } 3483 3484 CGF.EmitBranch(DefaultBB); 3485 3486 // 7. Build case 2: 3487 // ... 3488 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 3489 // ... 3490 // break; 3491 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 3492 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 3493 CGF.EmitBlock(Case2BB); 3494 3495 { 3496 CodeGenFunction::RunCleanupsScope Scope(CGF); 3497 if (!WithNowait) { 3498 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 3499 llvm::Value *EndArgs[] = { 3500 IdentTLoc, // ident_t *<loc> 3501 ThreadId, // i32 <gtid> 3502 Lock // kmp_critical_name *&<lock> 3503 }; 3504 CGF.EHStack 3505 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 3506 NormalAndEHCleanup, 3507 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 3508 llvm::makeArrayRef(EndArgs)); 3509 } 3510 auto ILHS = LHSExprs.begin(); 3511 auto IRHS = RHSExprs.begin(); 3512 auto IPriv = Privates.begin(); 3513 for (auto *E : ReductionOps) { 3514 const Expr *XExpr = nullptr; 3515 const Expr *EExpr = nullptr; 3516 const Expr *UpExpr = nullptr; 3517 BinaryOperatorKind BO = BO_Comma; 3518 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 3519 if (BO->getOpcode() == BO_Assign) { 3520 XExpr = BO->getLHS(); 3521 UpExpr = BO->getRHS(); 3522 } 3523 } 3524 // Try to emit update expression as a simple atomic. 3525 auto *RHSExpr = UpExpr; 3526 if (RHSExpr) { 3527 // Analyze RHS part of the whole expression. 3528 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 3529 RHSExpr->IgnoreParenImpCasts())) { 3530 // If this is a conditional operator, analyze its condition for 3531 // min/max reduction operator. 3532 RHSExpr = ACO->getCond(); 3533 } 3534 if (auto *BORHS = 3535 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 3536 EExpr = BORHS->getRHS(); 3537 BO = BORHS->getOpcode(); 3538 } 3539 } 3540 if (XExpr) { 3541 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3542 auto &&AtomicRedGen = [this, BO, VD, IPriv, 3543 Loc](CodeGenFunction &CGF, const Expr *XExpr, 3544 const Expr *EExpr, const Expr *UpExpr) { 3545 LValue X = CGF.EmitLValue(XExpr); 3546 RValue E; 3547 if (EExpr) 3548 E = CGF.EmitAnyExpr(EExpr); 3549 CGF.EmitOMPAtomicSimpleUpdateExpr( 3550 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, 3551 [&CGF, UpExpr, VD, IPriv](RValue XRValue) { 3552 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3553 PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address { 3554 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 3555 CGF.EmitStoreThroughLValue( 3556 XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType())); 3557 return LHSTemp; 3558 }); 3559 (void)PrivateScope.Privatize(); 3560 return CGF.EmitAnyExpr(UpExpr); 3561 }); 3562 }; 3563 if ((*IPriv)->getType()->isArrayType()) { 3564 // Emit atomic reduction for array section. 3565 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3566 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 3567 AtomicRedGen, XExpr, EExpr, UpExpr); 3568 } else 3569 // Emit atomic reduction for array subscript or single variable. 3570 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 3571 } else { 3572 // Emit as a critical region. 3573 auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *, 3574 const Expr *, const Expr *) { 3575 emitCriticalRegion( 3576 CGF, ".atomic_reduction", 3577 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc); 3578 }; 3579 if ((*IPriv)->getType()->isArrayType()) { 3580 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3581 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3582 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3583 CritRedGen); 3584 } else 3585 CritRedGen(CGF, nullptr, nullptr, nullptr); 3586 } 3587 ++ILHS, ++IRHS, ++IPriv; 3588 } 3589 } 3590 3591 CGF.EmitBranch(DefaultBB); 3592 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 3593 } 3594 3595 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 3596 SourceLocation Loc) { 3597 if (!CGF.HaveInsertPoint()) 3598 return; 3599 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 3600 // global_tid); 3601 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3602 // Ignore return result until untied tasks are supported. 3603 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 3604 } 3605 3606 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 3607 OpenMPDirectiveKind InnerKind, 3608 const RegionCodeGenTy &CodeGen, 3609 bool HasCancel) { 3610 if (!CGF.HaveInsertPoint()) 3611 return; 3612 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 3613 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 3614 } 3615 3616 namespace { 3617 enum RTCancelKind { 3618 CancelNoreq = 0, 3619 CancelParallel = 1, 3620 CancelLoop = 2, 3621 CancelSections = 3, 3622 CancelTaskgroup = 4 3623 }; 3624 } 3625 3626 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 3627 RTCancelKind CancelKind = CancelNoreq; 3628 if (CancelRegion == OMPD_parallel) 3629 CancelKind = CancelParallel; 3630 else if (CancelRegion == OMPD_for) 3631 CancelKind = CancelLoop; 3632 else if (CancelRegion == OMPD_sections) 3633 CancelKind = CancelSections; 3634 else { 3635 assert(CancelRegion == OMPD_taskgroup); 3636 CancelKind = CancelTaskgroup; 3637 } 3638 return CancelKind; 3639 } 3640 3641 void CGOpenMPRuntime::emitCancellationPointCall( 3642 CodeGenFunction &CGF, SourceLocation Loc, 3643 OpenMPDirectiveKind CancelRegion) { 3644 if (!CGF.HaveInsertPoint()) 3645 return; 3646 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 3647 // global_tid, kmp_int32 cncl_kind); 3648 if (auto *OMPRegionInfo = 3649 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3650 if (OMPRegionInfo->getDirectiveKind() == OMPD_single) 3651 return; 3652 if (OMPRegionInfo->hasCancel()) { 3653 llvm::Value *Args[] = { 3654 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3655 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 3656 // Ignore return result until untied tasks are supported. 3657 auto *Result = CGF.EmitRuntimeCall( 3658 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 3659 // if (__kmpc_cancellationpoint()) { 3660 // __kmpc_cancel_barrier(); 3661 // exit from construct; 3662 // } 3663 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3664 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 3665 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 3666 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3667 CGF.EmitBlock(ExitBB); 3668 // __kmpc_cancel_barrier(); 3669 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 3670 // exit from construct; 3671 auto CancelDest = 3672 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3673 CGF.EmitBranchThroughCleanup(CancelDest); 3674 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3675 } 3676 } 3677 } 3678 3679 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 3680 const Expr *IfCond, 3681 OpenMPDirectiveKind CancelRegion) { 3682 if (!CGF.HaveInsertPoint()) 3683 return; 3684 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 3685 // kmp_int32 cncl_kind); 3686 if (auto *OMPRegionInfo = 3687 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3688 if (OMPRegionInfo->getDirectiveKind() == OMPD_single) 3689 return; 3690 auto &&ThenGen = [this, Loc, CancelRegion, 3691 OMPRegionInfo](CodeGenFunction &CGF) { 3692 llvm::Value *Args[] = { 3693 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3694 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 3695 // Ignore return result until untied tasks are supported. 3696 auto *Result = 3697 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 3698 // if (__kmpc_cancel()) { 3699 // __kmpc_cancel_barrier(); 3700 // exit from construct; 3701 // } 3702 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3703 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 3704 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 3705 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3706 CGF.EmitBlock(ExitBB); 3707 // __kmpc_cancel_barrier(); 3708 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 3709 // exit from construct; 3710 auto CancelDest = 3711 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3712 CGF.EmitBranchThroughCleanup(CancelDest); 3713 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3714 }; 3715 if (IfCond) 3716 emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {}); 3717 else 3718 ThenGen(CGF); 3719 } 3720 } 3721 3722 /// \brief Obtain information that uniquely identifies a target entry. This 3723 /// consists of the file and device IDs as well as line and column numbers 3724 /// associated with the relevant entry source location. 3725 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 3726 unsigned &DeviceID, unsigned &FileID, 3727 unsigned &LineNum, unsigned &ColumnNum) { 3728 3729 auto &SM = C.getSourceManager(); 3730 3731 // The loc should be always valid and have a file ID (the user cannot use 3732 // #pragma directives in macros) 3733 3734 assert(Loc.isValid() && "Source location is expected to be always valid."); 3735 assert(Loc.isFileID() && "Source location is expected to refer to a file."); 3736 3737 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 3738 assert(PLoc.isValid() && "Source location is expected to be always valid."); 3739 3740 llvm::sys::fs::UniqueID ID; 3741 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 3742 llvm_unreachable("Source file with target region no longer exists!"); 3743 3744 DeviceID = ID.getDevice(); 3745 FileID = ID.getFile(); 3746 LineNum = PLoc.getLine(); 3747 ColumnNum = PLoc.getColumn(); 3748 return; 3749 } 3750 3751 void CGOpenMPRuntime::emitTargetOutlinedFunction( 3752 const OMPExecutableDirective &D, StringRef ParentName, 3753 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 3754 bool IsOffloadEntry) { 3755 3756 assert(!ParentName.empty() && "Invalid target region parent name!"); 3757 3758 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 3759 3760 // Emit target region as a standalone region. 3761 auto &&CodeGen = [&CS](CodeGenFunction &CGF) { 3762 CGF.EmitStmt(CS.getCapturedStmt()); 3763 }; 3764 3765 // Create a unique name for the proxy/entry function that using the source 3766 // location information of the current target region. The name will be 3767 // something like: 3768 // 3769 // .omp_offloading.DD_FFFF.PP.lBB.cCC 3770 // 3771 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 3772 // mangled name of the function that encloses the target region, BB is the 3773 // line number of the target region, and CC is the column number of the target 3774 // region. 3775 3776 unsigned DeviceID; 3777 unsigned FileID; 3778 unsigned Line; 3779 unsigned Column; 3780 getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, 3781 Line, Column); 3782 SmallString<64> EntryFnName; 3783 { 3784 llvm::raw_svector_ostream OS(EntryFnName); 3785 OS << ".omp_offloading" << llvm::format(".%x", DeviceID) 3786 << llvm::format(".%x.", FileID) << ParentName << ".l" << Line << ".c" 3787 << Column; 3788 } 3789 3790 CodeGenFunction CGF(CGM, true); 3791 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 3792 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 3793 3794 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 3795 3796 // If this target outline function is not an offload entry, we don't need to 3797 // register it. 3798 if (!IsOffloadEntry) 3799 return; 3800 3801 // The target region ID is used by the runtime library to identify the current 3802 // target region, so it only has to be unique and not necessarily point to 3803 // anything. It could be the pointer to the outlined function that implements 3804 // the target region, but we aren't using that so that the compiler doesn't 3805 // need to keep that, and could therefore inline the host function if proven 3806 // worthwhile during optimization. In the other hand, if emitting code for the 3807 // device, the ID has to be the function address so that it can retrieved from 3808 // the offloading entry and launched by the runtime library. We also mark the 3809 // outlined function to have external linkage in case we are emitting code for 3810 // the device, because these functions will be entry points to the device. 3811 3812 if (CGM.getLangOpts().OpenMPIsDevice) { 3813 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 3814 OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); 3815 } else 3816 OutlinedFnID = new llvm::GlobalVariable( 3817 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3818 llvm::GlobalValue::PrivateLinkage, 3819 llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); 3820 3821 // Register the information for the entry associated with this target region. 3822 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3823 DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID); 3824 return; 3825 } 3826 3827 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 3828 const OMPExecutableDirective &D, 3829 llvm::Value *OutlinedFn, 3830 llvm::Value *OutlinedFnID, 3831 const Expr *IfCond, const Expr *Device, 3832 ArrayRef<llvm::Value *> CapturedVars) { 3833 if (!CGF.HaveInsertPoint()) 3834 return; 3835 /// \brief Values for bit flags used to specify the mapping type for 3836 /// offloading. 3837 enum OpenMPOffloadMappingFlags { 3838 /// \brief Allocate memory on the device and move data from host to device. 3839 OMP_MAP_TO = 0x01, 3840 /// \brief Allocate memory on the device and move data from device to host. 3841 OMP_MAP_FROM = 0x02, 3842 /// \brief The element passed to the device is a pointer. 3843 OMP_MAP_PTR = 0x20, 3844 /// \brief Pass the element to the device by value. 3845 OMP_MAP_BYCOPY = 0x80, 3846 }; 3847 3848 enum OpenMPOffloadingReservedDeviceIDs { 3849 /// \brief Device ID if the device was not defined, runtime should get it 3850 /// from environment variables in the spec. 3851 OMP_DEVICEID_UNDEF = -1, 3852 }; 3853 3854 assert(OutlinedFn && "Invalid outlined function!"); 3855 3856 auto &Ctx = CGF.getContext(); 3857 3858 // Fill up the arrays with the all the captured variables. 3859 SmallVector<llvm::Value *, 16> BasePointers; 3860 SmallVector<llvm::Value *, 16> Pointers; 3861 SmallVector<llvm::Value *, 16> Sizes; 3862 SmallVector<unsigned, 16> MapTypes; 3863 3864 bool hasVLACaptures = false; 3865 3866 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 3867 auto RI = CS.getCapturedRecordDecl()->field_begin(); 3868 // auto II = CS.capture_init_begin(); 3869 auto CV = CapturedVars.begin(); 3870 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 3871 CE = CS.capture_end(); 3872 CI != CE; ++CI, ++RI, ++CV) { 3873 StringRef Name; 3874 QualType Ty; 3875 llvm::Value *BasePointer; 3876 llvm::Value *Pointer; 3877 llvm::Value *Size; 3878 unsigned MapType; 3879 3880 // VLA sizes are passed to the outlined region by copy. 3881 if (CI->capturesVariableArrayType()) { 3882 BasePointer = Pointer = *CV; 3883 Size = getTypeSize(CGF, RI->getType()); 3884 // Copy to the device as an argument. No need to retrieve it. 3885 MapType = OMP_MAP_BYCOPY; 3886 hasVLACaptures = true; 3887 } else if (CI->capturesThis()) { 3888 BasePointer = Pointer = *CV; 3889 const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr()); 3890 Size = getTypeSize(CGF, PtrTy->getPointeeType()); 3891 // Default map type. 3892 MapType = OMP_MAP_TO | OMP_MAP_FROM; 3893 } else if (CI->capturesVariableByCopy()) { 3894 MapType = OMP_MAP_BYCOPY; 3895 if (!RI->getType()->isAnyPointerType()) { 3896 // If the field is not a pointer, we need to save the actual value and 3897 // load it as a void pointer. 3898 auto DstAddr = CGF.CreateMemTemp( 3899 Ctx.getUIntPtrType(), 3900 Twine(CI->getCapturedVar()->getName()) + ".casted"); 3901 LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 3902 3903 auto *SrcAddrVal = CGF.EmitScalarConversion( 3904 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 3905 Ctx.getPointerType(RI->getType()), SourceLocation()); 3906 LValue SrcLV = 3907 CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType()); 3908 3909 // Store the value using the source type pointer. 3910 CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV); 3911 3912 // Load the value using the destination type pointer. 3913 BasePointer = Pointer = 3914 CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 3915 } else { 3916 MapType |= OMP_MAP_PTR; 3917 BasePointer = Pointer = *CV; 3918 } 3919 Size = getTypeSize(CGF, RI->getType()); 3920 } else { 3921 assert(CI->capturesVariable() && "Expected captured reference."); 3922 BasePointer = Pointer = *CV; 3923 3924 const ReferenceType *PtrTy = 3925 cast<ReferenceType>(RI->getType().getTypePtr()); 3926 QualType ElementType = PtrTy->getPointeeType(); 3927 Size = getTypeSize(CGF, ElementType); 3928 // The default map type for a scalar/complex type is 'to' because by 3929 // default the value doesn't have to be retrieved. For an aggregate type, 3930 // the default is 'tofrom'. 3931 MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) 3932 : OMP_MAP_TO; 3933 if (ElementType->isAnyPointerType()) 3934 MapType |= OMP_MAP_PTR; 3935 } 3936 3937 BasePointers.push_back(BasePointer); 3938 Pointers.push_back(Pointer); 3939 Sizes.push_back(Size); 3940 MapTypes.push_back(MapType); 3941 } 3942 3943 // Keep track on whether the host function has to be executed. 3944 auto OffloadErrorQType = 3945 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 3946 auto OffloadError = CGF.MakeAddrLValue( 3947 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 3948 OffloadErrorQType); 3949 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 3950 OffloadError); 3951 3952 // Fill up the pointer arrays and transfer execution to the device. 3953 auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, 3954 hasVLACaptures, Device, OutlinedFnID, OffloadError, 3955 OffloadErrorQType](CodeGenFunction &CGF) { 3956 unsigned PointerNumVal = BasePointers.size(); 3957 llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); 3958 llvm::Value *BasePointersArray; 3959 llvm::Value *PointersArray; 3960 llvm::Value *SizesArray; 3961 llvm::Value *MapTypesArray; 3962 3963 if (PointerNumVal) { 3964 llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); 3965 QualType PointerArrayType = Ctx.getConstantArrayType( 3966 Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 3967 /*IndexTypeQuals=*/0); 3968 3969 BasePointersArray = 3970 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 3971 PointersArray = 3972 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 3973 3974 // If we don't have any VLA types, we can use a constant array for the map 3975 // sizes, otherwise we need to fill up the arrays as we do for the 3976 // pointers. 3977 if (hasVLACaptures) { 3978 QualType SizeArrayType = Ctx.getConstantArrayType( 3979 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 3980 /*IndexTypeQuals=*/0); 3981 SizesArray = 3982 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 3983 } else { 3984 // We expect all the sizes to be constant, so we collect them to create 3985 // a constant array. 3986 SmallVector<llvm::Constant *, 16> ConstSizes; 3987 for (auto S : Sizes) 3988 ConstSizes.push_back(cast<llvm::Constant>(S)); 3989 3990 auto *SizesArrayInit = llvm::ConstantArray::get( 3991 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 3992 auto *SizesArrayGbl = new llvm::GlobalVariable( 3993 CGM.getModule(), SizesArrayInit->getType(), 3994 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 3995 SizesArrayInit, ".offload_sizes"); 3996 SizesArrayGbl->setUnnamedAddr(true); 3997 SizesArray = SizesArrayGbl; 3998 } 3999 4000 // The map types are always constant so we don't need to generate code to 4001 // fill arrays. Instead, we create an array constant. 4002 llvm::Constant *MapTypesArrayInit = 4003 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 4004 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 4005 CGM.getModule(), MapTypesArrayInit->getType(), 4006 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 4007 MapTypesArrayInit, ".offload_maptypes"); 4008 MapTypesArrayGbl->setUnnamedAddr(true); 4009 MapTypesArray = MapTypesArrayGbl; 4010 4011 for (unsigned i = 0; i < PointerNumVal; ++i) { 4012 4013 llvm::Value *BPVal = BasePointers[i]; 4014 if (BPVal->getType()->isPointerTy()) 4015 BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); 4016 else { 4017 assert(BPVal->getType()->isIntegerTy() && 4018 "If not a pointer, the value type must be an integer."); 4019 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); 4020 } 4021 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 4022 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), 4023 BasePointersArray, 0, i); 4024 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 4025 CGF.Builder.CreateStore(BPVal, BPAddr); 4026 4027 llvm::Value *PVal = Pointers[i]; 4028 if (PVal->getType()->isPointerTy()) 4029 PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); 4030 else { 4031 assert(PVal->getType()->isIntegerTy() && 4032 "If not a pointer, the value type must be an integer."); 4033 PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); 4034 } 4035 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 4036 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 4037 0, i); 4038 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 4039 CGF.Builder.CreateStore(PVal, PAddr); 4040 4041 if (hasVLACaptures) { 4042 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 4043 llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 4044 /*Idx0=*/0, 4045 /*Idx1=*/i); 4046 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 4047 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast( 4048 Sizes[i], CGM.SizeTy, /*isSigned=*/true), 4049 SAddr); 4050 } 4051 } 4052 4053 BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4054 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, 4055 /*Idx0=*/0, /*Idx1=*/0); 4056 PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4057 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 4058 /*Idx0=*/0, 4059 /*Idx1=*/0); 4060 SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4061 llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 4062 /*Idx0=*/0, /*Idx1=*/0); 4063 MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32( 4064 llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray, 4065 /*Idx0=*/0, 4066 /*Idx1=*/0); 4067 4068 } else { 4069 BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 4070 PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 4071 SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 4072 MapTypesArray = 4073 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 4074 } 4075 4076 // On top of the arrays that were filled up, the target offloading call 4077 // takes as arguments the device id as well as the host pointer. The host 4078 // pointer is used by the runtime library to identify the current target 4079 // region, so it only has to be unique and not necessarily point to 4080 // anything. It could be the pointer to the outlined function that 4081 // implements the target region, but we aren't using that so that the 4082 // compiler doesn't need to keep that, and could therefore inline the host 4083 // function if proven worthwhile during optimization. 4084 4085 // From this point on, we need to have an ID of the target region defined. 4086 assert(OutlinedFnID && "Invalid outlined function ID!"); 4087 4088 // Emit device ID if any. 4089 llvm::Value *DeviceID; 4090 if (Device) 4091 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4092 CGM.Int32Ty, /*isSigned=*/true); 4093 else 4094 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 4095 4096 llvm::Value *OffloadingArgs[] = { 4097 DeviceID, OutlinedFnID, PointerNum, BasePointersArray, 4098 PointersArray, SizesArray, MapTypesArray}; 4099 auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), 4100 OffloadingArgs); 4101 4102 CGF.EmitStoreOfScalar(Return, OffloadError); 4103 }; 4104 4105 // Notify that the host version must be executed. 4106 auto &&ElseGen = [this, OffloadError, 4107 OffloadErrorQType](CodeGenFunction &CGF) { 4108 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), 4109 OffloadError); 4110 }; 4111 4112 // If we have a target function ID it means that we need to support 4113 // offloading, otherwise, just execute on the host. We need to execute on host 4114 // regardless of the conditional in the if clause if, e.g., the user do not 4115 // specify target triples. 4116 if (OutlinedFnID) { 4117 if (IfCond) { 4118 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 4119 } else { 4120 CodeGenFunction::RunCleanupsScope Scope(CGF); 4121 ThenGen(CGF); 4122 } 4123 } else { 4124 CodeGenFunction::RunCleanupsScope Scope(CGF); 4125 ElseGen(CGF); 4126 } 4127 4128 // Check the error code and execute the host version if required. 4129 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 4130 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 4131 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 4132 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 4133 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 4134 4135 CGF.EmitBlock(OffloadFailedBlock); 4136 CGF.Builder.CreateCall(OutlinedFn, BasePointers); 4137 CGF.EmitBranch(OffloadContBlock); 4138 4139 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 4140 return; 4141 } 4142 4143 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 4144 StringRef ParentName) { 4145 if (!S) 4146 return; 4147 4148 // If we find a OMP target directive, codegen the outline function and 4149 // register the result. 4150 // FIXME: Add other directives with target when they become supported. 4151 bool isTargetDirective = isa<OMPTargetDirective>(S); 4152 4153 if (isTargetDirective) { 4154 auto *E = cast<OMPExecutableDirective>(S); 4155 unsigned DeviceID; 4156 unsigned FileID; 4157 unsigned Line; 4158 unsigned Column; 4159 getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, 4160 FileID, Line, Column); 4161 4162 // Is this a target region that should not be emitted as an entry point? If 4163 // so just signal we are done with this target region. 4164 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo( 4165 DeviceID, FileID, ParentName, Line, Column)) 4166 return; 4167 4168 llvm::Function *Fn; 4169 llvm::Constant *Addr; 4170 emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, 4171 /*isOffloadEntry=*/true); 4172 assert(Fn && Addr && "Target region emission failed."); 4173 return; 4174 } 4175 4176 if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { 4177 if (!E->getAssociatedStmt()) 4178 return; 4179 4180 scanForTargetRegionsFunctions( 4181 cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), 4182 ParentName); 4183 return; 4184 } 4185 4186 // If this is a lambda function, look into its body. 4187 if (auto *L = dyn_cast<LambdaExpr>(S)) 4188 S = L->getBody(); 4189 4190 // Keep looking for target regions recursively. 4191 for (auto *II : S->children()) 4192 scanForTargetRegionsFunctions(II, ParentName); 4193 4194 return; 4195 } 4196 4197 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 4198 auto &FD = *cast<FunctionDecl>(GD.getDecl()); 4199 4200 // If emitting code for the host, we do not process FD here. Instead we do 4201 // the normal code generation. 4202 if (!CGM.getLangOpts().OpenMPIsDevice) 4203 return false; 4204 4205 // Try to detect target regions in the function. 4206 scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); 4207 4208 // We should not emit any function othen that the ones created during the 4209 // scanning. Therefore, we signal that this function is completely dealt 4210 // with. 4211 return true; 4212 } 4213 4214 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 4215 if (!CGM.getLangOpts().OpenMPIsDevice) 4216 return false; 4217 4218 // Check if there are Ctors/Dtors in this declaration and look for target 4219 // regions in it. We use the complete variant to produce the kernel name 4220 // mangling. 4221 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 4222 if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 4223 for (auto *Ctor : RD->ctors()) { 4224 StringRef ParentName = 4225 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 4226 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 4227 } 4228 auto *Dtor = RD->getDestructor(); 4229 if (Dtor) { 4230 StringRef ParentName = 4231 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 4232 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 4233 } 4234 } 4235 4236 // If we are in target mode we do not emit any global (declare target is not 4237 // implemented yet). Therefore we signal that GD was processed in this case. 4238 return true; 4239 } 4240 4241 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 4242 auto *VD = GD.getDecl(); 4243 if (isa<FunctionDecl>(VD)) 4244 return emitTargetFunctions(GD); 4245 4246 return emitTargetGlobalVariable(GD); 4247 } 4248 4249 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 4250 // If we have offloading in the current module, we need to emit the entries 4251 // now and register the offloading descriptor. 4252 createOffloadEntriesAndInfoMetadata(); 4253 4254 // Create and register the offloading binary descriptors. This is the main 4255 // entity that captures all the information about offloading in the current 4256 // compilation unit. 4257 return createOffloadingBinaryDescriptorRegistration(); 4258 } 4259