1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CGCleanup.h" 17 #include "clang/AST/Decl.h" 18 #include "clang/AST/StmtOpenMP.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/IR/CallSite.h" 21 #include "llvm/IR/DerivedTypes.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/IR/Value.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <cassert> 26 27 using namespace clang; 28 using namespace CodeGen; 29 30 namespace { 31 /// \brief Base class for handling code generation inside OpenMP regions. 32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 33 public: 34 /// \brief Kinds of OpenMP regions used in codegen. 35 enum CGOpenMPRegionKind { 36 /// \brief Region with outlined function for standalone 'parallel' 37 /// directive. 38 ParallelOutlinedRegion, 39 /// \brief Region with outlined function for standalone 'task' directive. 40 TaskOutlinedRegion, 41 /// \brief Region for constructs that do not require function outlining, 42 /// like 'for', 'sections', 'atomic' etc. directives. 43 InlinedRegion, 44 /// \brief Region with outlined function for standalone 'target' directive. 45 TargetRegion, 46 }; 47 48 CGOpenMPRegionInfo(const CapturedStmt &CS, 49 const CGOpenMPRegionKind RegionKind, 50 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 51 bool HasCancel) 52 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 53 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 54 55 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 56 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 57 bool HasCancel) 58 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 59 Kind(Kind), HasCancel(HasCancel) {} 60 61 /// \brief Get a variable or parameter for storing global thread id 62 /// inside OpenMP construct. 63 virtual const VarDecl *getThreadIDVariable() const = 0; 64 65 /// \brief Emit the captured statement body. 66 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 67 68 /// \brief Get an LValue for the current ThreadID variable. 69 /// \return LValue for thread id variable. This LValue always has type int32*. 70 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 71 72 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 73 74 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 75 76 bool hasCancel() const { return HasCancel; } 77 78 static bool classof(const CGCapturedStmtInfo *Info) { 79 return Info->getKind() == CR_OpenMP; 80 } 81 82 protected: 83 CGOpenMPRegionKind RegionKind; 84 const RegionCodeGenTy &CodeGen; 85 OpenMPDirectiveKind Kind; 86 bool HasCancel; 87 }; 88 89 /// \brief API for captured statement code generation in OpenMP constructs. 90 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 91 public: 92 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 93 const RegionCodeGenTy &CodeGen, 94 OpenMPDirectiveKind Kind, bool HasCancel) 95 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 96 HasCancel), 97 ThreadIDVar(ThreadIDVar) { 98 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 99 } 100 /// \brief Get a variable or parameter for storing global thread id 101 /// inside OpenMP construct. 102 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 103 104 /// \brief Get the name of the capture helper. 105 StringRef getHelperName() const override { return ".omp_outlined."; } 106 107 static bool classof(const CGCapturedStmtInfo *Info) { 108 return CGOpenMPRegionInfo::classof(Info) && 109 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 110 ParallelOutlinedRegion; 111 } 112 113 private: 114 /// \brief A variable or parameter storing global thread id for OpenMP 115 /// constructs. 116 const VarDecl *ThreadIDVar; 117 }; 118 119 /// \brief API for captured statement code generation in OpenMP constructs. 120 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 121 public: 122 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 123 const VarDecl *ThreadIDVar, 124 const RegionCodeGenTy &CodeGen, 125 OpenMPDirectiveKind Kind, bool HasCancel) 126 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 127 ThreadIDVar(ThreadIDVar) { 128 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 129 } 130 /// \brief Get a variable or parameter for storing global thread id 131 /// inside OpenMP construct. 132 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 133 134 /// \brief Get an LValue for the current ThreadID variable. 135 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 136 137 /// \brief Get the name of the capture helper. 138 StringRef getHelperName() const override { return ".omp_outlined."; } 139 140 static bool classof(const CGCapturedStmtInfo *Info) { 141 return CGOpenMPRegionInfo::classof(Info) && 142 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 143 TaskOutlinedRegion; 144 } 145 146 private: 147 /// \brief A variable or parameter storing global thread id for OpenMP 148 /// constructs. 149 const VarDecl *ThreadIDVar; 150 }; 151 152 /// \brief API for inlined captured statement code generation in OpenMP 153 /// constructs. 154 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 155 public: 156 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 157 const RegionCodeGenTy &CodeGen, 158 OpenMPDirectiveKind Kind, bool HasCancel) 159 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 160 OldCSI(OldCSI), 161 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 162 // \brief Retrieve the value of the context parameter. 163 llvm::Value *getContextValue() const override { 164 if (OuterRegionInfo) 165 return OuterRegionInfo->getContextValue(); 166 llvm_unreachable("No context value for inlined OpenMP region"); 167 } 168 void setContextValue(llvm::Value *V) override { 169 if (OuterRegionInfo) { 170 OuterRegionInfo->setContextValue(V); 171 return; 172 } 173 llvm_unreachable("No context value for inlined OpenMP region"); 174 } 175 /// \brief Lookup the captured field decl for a variable. 176 const FieldDecl *lookup(const VarDecl *VD) const override { 177 if (OuterRegionInfo) 178 return OuterRegionInfo->lookup(VD); 179 // If there is no outer outlined region,no need to lookup in a list of 180 // captured variables, we can use the original one. 181 return nullptr; 182 } 183 FieldDecl *getThisFieldDecl() const override { 184 if (OuterRegionInfo) 185 return OuterRegionInfo->getThisFieldDecl(); 186 return nullptr; 187 } 188 /// \brief Get a variable or parameter for storing global thread id 189 /// inside OpenMP construct. 190 const VarDecl *getThreadIDVariable() const override { 191 if (OuterRegionInfo) 192 return OuterRegionInfo->getThreadIDVariable(); 193 return nullptr; 194 } 195 196 /// \brief Get the name of the capture helper. 197 StringRef getHelperName() const override { 198 if (auto *OuterRegionInfo = getOldCSI()) 199 return OuterRegionInfo->getHelperName(); 200 llvm_unreachable("No helper name for inlined OpenMP construct"); 201 } 202 203 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 208 } 209 210 private: 211 /// \brief CodeGen info about outer OpenMP region. 212 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 213 CGOpenMPRegionInfo *OuterRegionInfo; 214 }; 215 216 /// \brief API for captured statement code generation in OpenMP target 217 /// constructs. For this captures, implicit parameters are used instead of the 218 /// captured fields. 219 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { 220 public: 221 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 222 const RegionCodeGenTy &CodeGen) 223 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 224 /*HasCancel = */ false) {} 225 226 /// \brief This is unused for target regions because each starts executing 227 /// with a single thread. 228 const VarDecl *getThreadIDVariable() const override { return nullptr; } 229 230 /// \brief Get the name of the capture helper. 231 StringRef getHelperName() const override { return ".omp_offloading."; } 232 233 static bool classof(const CGCapturedStmtInfo *Info) { 234 return CGOpenMPRegionInfo::classof(Info) && 235 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 236 } 237 }; 238 239 /// \brief RAII for emitting code of OpenMP constructs. 240 class InlinedOpenMPRegionRAII { 241 CodeGenFunction &CGF; 242 243 public: 244 /// \brief Constructs region for combined constructs. 245 /// \param CodeGen Code generation sequence for combined directives. Includes 246 /// a list of functions used for code generation of implicitly inlined 247 /// regions. 248 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 249 OpenMPDirectiveKind Kind, bool HasCancel) 250 : CGF(CGF) { 251 // Start emission for the construct. 252 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 253 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 254 } 255 ~InlinedOpenMPRegionRAII() { 256 // Restore original CapturedStmtInfo only if we're done with code emission. 257 auto *OldCSI = 258 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 259 delete CGF.CapturedStmtInfo; 260 CGF.CapturedStmtInfo = OldCSI; 261 } 262 }; 263 264 } // anonymous namespace 265 266 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr, 267 QualType Ty) { 268 AlignmentSource Source; 269 CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source); 270 return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align), 271 Ty->getPointeeType(), Source); 272 } 273 274 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 275 return emitLoadOfPointerLValue(CGF, 276 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 277 getThreadIDVariable()->getType()); 278 } 279 280 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 281 // 1.2.2 OpenMP Language Terminology 282 // Structured block - An executable statement with a single entry at the 283 // top and a single exit at the bottom. 284 // The point of exit cannot be a branch out of the structured block. 285 // longjmp() and throw() must not violate the entry/exit criteria. 286 CGF.EHStack.pushTerminate(); 287 { 288 CodeGenFunction::RunCleanupsScope Scope(CGF); 289 CodeGen(CGF); 290 } 291 CGF.EHStack.popTerminate(); 292 } 293 294 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 295 CodeGenFunction &CGF) { 296 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 297 getThreadIDVariable()->getType(), 298 AlignmentSource::Decl); 299 } 300 301 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 302 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { 303 IdentTy = llvm::StructType::create( 304 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 305 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 306 CGM.Int8PtrTy /* psource */, nullptr); 307 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 308 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 309 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 310 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 311 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 312 } 313 314 void CGOpenMPRuntime::clear() { 315 InternalVars.clear(); 316 } 317 318 // Layout information for ident_t. 319 static CharUnits getIdentAlign(CodeGenModule &CGM) { 320 return CGM.getPointerAlign(); 321 } 322 static CharUnits getIdentSize(CodeGenModule &CGM) { 323 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 324 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 325 } 326 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) { 327 // All the fields except the last are i32, so this works beautifully. 328 return unsigned(Field) * CharUnits::fromQuantity(4); 329 } 330 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 331 CGOpenMPRuntime::IdentFieldIndex Field, 332 const llvm::Twine &Name = "") { 333 auto Offset = getOffsetOfIdentField(Field); 334 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 335 } 336 337 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 338 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 339 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 340 assert(ThreadIDVar->getType()->isPointerType() && 341 "thread id variable must be of type kmp_int32 *"); 342 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 343 CodeGenFunction CGF(CGM, true); 344 bool HasCancel = false; 345 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 346 HasCancel = OPD->hasCancel(); 347 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 348 HasCancel = OPSD->hasCancel(); 349 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 350 HasCancel = OPFD->hasCancel(); 351 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 352 HasCancel); 353 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 354 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 355 } 356 357 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 358 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 359 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 360 assert(!ThreadIDVar->getType()->isPointerType() && 361 "thread id variable must be of type kmp_int32 for tasks"); 362 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 363 CodeGenFunction CGF(CGM, true); 364 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 365 InnermostKind, 366 cast<OMPTaskDirective>(D).hasCancel()); 367 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 368 return CGF.GenerateCapturedStmtFunction(*CS); 369 } 370 371 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 372 CharUnits Align = getIdentAlign(CGM); 373 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 374 if (!Entry) { 375 if (!DefaultOpenMPPSource) { 376 // Initialize default location for psource field of ident_t structure of 377 // all ident_t objects. Format is ";file;function;line;column;;". 378 // Taken from 379 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 380 DefaultOpenMPPSource = 381 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 382 DefaultOpenMPPSource = 383 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 384 } 385 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 386 CGM.getModule(), IdentTy, /*isConstant*/ true, 387 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 388 DefaultOpenMPLocation->setUnnamedAddr(true); 389 DefaultOpenMPLocation->setAlignment(Align.getQuantity()); 390 391 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 392 llvm::Constant *Values[] = {Zero, 393 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 394 Zero, Zero, DefaultOpenMPPSource}; 395 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 396 DefaultOpenMPLocation->setInitializer(Init); 397 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 398 } 399 return Address(Entry, Align); 400 } 401 402 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 403 SourceLocation Loc, 404 OpenMPLocationFlags Flags) { 405 // If no debug info is generated - return global default location. 406 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 407 Loc.isInvalid()) 408 return getOrCreateDefaultLocation(Flags).getPointer(); 409 410 assert(CGF.CurFn && "No function in current CodeGenFunction."); 411 412 Address LocValue = Address::invalid(); 413 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 414 if (I != OpenMPLocThreadIDMap.end()) 415 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 416 417 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 418 // GetOpenMPThreadID was called before this routine. 419 if (!LocValue.isValid()) { 420 // Generate "ident_t .kmpc_loc.addr;" 421 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 422 ".kmpc_loc.addr"); 423 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 424 Elem.second.DebugLoc = AI.getPointer(); 425 LocValue = AI; 426 427 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 428 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 429 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 430 CGM.getSize(getIdentSize(CGF.CGM))); 431 } 432 433 // char **psource = &.kmpc_loc_<flags>.addr.psource; 434 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 435 436 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 437 if (OMPDebugLoc == nullptr) { 438 SmallString<128> Buffer2; 439 llvm::raw_svector_ostream OS2(Buffer2); 440 // Build debug location 441 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 442 OS2 << ";" << PLoc.getFilename() << ";"; 443 if (const FunctionDecl *FD = 444 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 445 OS2 << FD->getQualifiedNameAsString(); 446 } 447 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 448 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 449 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 450 } 451 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 452 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 453 454 // Our callers always pass this to a runtime function, so for 455 // convenience, go ahead and return a naked pointer. 456 return LocValue.getPointer(); 457 } 458 459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 460 SourceLocation Loc) { 461 assert(CGF.CurFn && "No function in current CodeGenFunction."); 462 463 llvm::Value *ThreadID = nullptr; 464 // Check whether we've already cached a load of the thread id in this 465 // function. 466 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 467 if (I != OpenMPLocThreadIDMap.end()) { 468 ThreadID = I->second.ThreadID; 469 if (ThreadID != nullptr) 470 return ThreadID; 471 } 472 if (auto OMPRegionInfo = 473 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 474 if (OMPRegionInfo->getThreadIDVariable()) { 475 // Check if this an outlined function with thread id passed as argument. 476 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 477 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 478 // If value loaded in entry block, cache it and use it everywhere in 479 // function. 480 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 481 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 482 Elem.second.ThreadID = ThreadID; 483 } 484 return ThreadID; 485 } 486 } 487 488 // This is not an outlined function region - need to call __kmpc_int32 489 // kmpc_global_thread_num(ident_t *loc). 490 // Generate thread id value and cache this value for use across the 491 // function. 492 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 493 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 494 ThreadID = 495 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 496 emitUpdateLocation(CGF, Loc)); 497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 498 Elem.second.ThreadID = ThreadID; 499 return ThreadID; 500 } 501 502 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 503 assert(CGF.CurFn && "No function in current CodeGenFunction."); 504 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 505 OpenMPLocThreadIDMap.erase(CGF.CurFn); 506 } 507 508 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 509 return llvm::PointerType::getUnqual(IdentTy); 510 } 511 512 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 513 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 514 } 515 516 llvm::Constant * 517 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 518 llvm::Constant *RTLFn = nullptr; 519 switch (Function) { 520 case OMPRTL__kmpc_fork_call: { 521 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 522 // microtask, ...); 523 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 524 getKmpc_MicroPointerTy()}; 525 llvm::FunctionType *FnTy = 526 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 527 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 528 break; 529 } 530 case OMPRTL__kmpc_global_thread_num: { 531 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 532 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 533 llvm::FunctionType *FnTy = 534 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 535 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 536 break; 537 } 538 case OMPRTL__kmpc_threadprivate_cached: { 539 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 540 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 541 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 542 CGM.VoidPtrTy, CGM.SizeTy, 543 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 544 llvm::FunctionType *FnTy = 545 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 546 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 547 break; 548 } 549 case OMPRTL__kmpc_critical: { 550 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 551 // kmp_critical_name *crit); 552 llvm::Type *TypeParams[] = { 553 getIdentTyPointerTy(), CGM.Int32Ty, 554 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 555 llvm::FunctionType *FnTy = 556 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 557 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 558 break; 559 } 560 case OMPRTL__kmpc_threadprivate_register: { 561 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 562 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 563 // typedef void *(*kmpc_ctor)(void *); 564 auto KmpcCtorTy = 565 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 566 /*isVarArg*/ false)->getPointerTo(); 567 // typedef void *(*kmpc_cctor)(void *, void *); 568 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 569 auto KmpcCopyCtorTy = 570 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 571 /*isVarArg*/ false)->getPointerTo(); 572 // typedef void (*kmpc_dtor)(void *); 573 auto KmpcDtorTy = 574 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 575 ->getPointerTo(); 576 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 577 KmpcCopyCtorTy, KmpcDtorTy}; 578 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 579 /*isVarArg*/ false); 580 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 581 break; 582 } 583 case OMPRTL__kmpc_end_critical: { 584 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 585 // kmp_critical_name *crit); 586 llvm::Type *TypeParams[] = { 587 getIdentTyPointerTy(), CGM.Int32Ty, 588 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 589 llvm::FunctionType *FnTy = 590 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 591 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 592 break; 593 } 594 case OMPRTL__kmpc_cancel_barrier: { 595 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 596 // global_tid); 597 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 598 llvm::FunctionType *FnTy = 599 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 600 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 601 break; 602 } 603 case OMPRTL__kmpc_barrier: { 604 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 605 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 606 llvm::FunctionType *FnTy = 607 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 608 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 609 break; 610 } 611 case OMPRTL__kmpc_for_static_fini: { 612 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 613 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 614 llvm::FunctionType *FnTy = 615 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 616 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 617 break; 618 } 619 case OMPRTL__kmpc_push_num_threads: { 620 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 621 // kmp_int32 num_threads) 622 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 623 CGM.Int32Ty}; 624 llvm::FunctionType *FnTy = 625 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 626 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 627 break; 628 } 629 case OMPRTL__kmpc_serialized_parallel: { 630 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 631 // global_tid); 632 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 633 llvm::FunctionType *FnTy = 634 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 635 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 636 break; 637 } 638 case OMPRTL__kmpc_end_serialized_parallel: { 639 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 640 // global_tid); 641 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 642 llvm::FunctionType *FnTy = 643 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 644 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 645 break; 646 } 647 case OMPRTL__kmpc_flush: { 648 // Build void __kmpc_flush(ident_t *loc); 649 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 650 llvm::FunctionType *FnTy = 651 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 652 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 653 break; 654 } 655 case OMPRTL__kmpc_master: { 656 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 657 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 658 llvm::FunctionType *FnTy = 659 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 660 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 661 break; 662 } 663 case OMPRTL__kmpc_end_master: { 664 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 665 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 666 llvm::FunctionType *FnTy = 667 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 668 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 669 break; 670 } 671 case OMPRTL__kmpc_omp_taskyield: { 672 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 673 // int end_part); 674 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 675 llvm::FunctionType *FnTy = 676 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 677 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 678 break; 679 } 680 case OMPRTL__kmpc_single: { 681 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 682 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 683 llvm::FunctionType *FnTy = 684 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 685 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 686 break; 687 } 688 case OMPRTL__kmpc_end_single: { 689 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 690 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 691 llvm::FunctionType *FnTy = 692 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 693 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 694 break; 695 } 696 case OMPRTL__kmpc_omp_task_alloc: { 697 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 698 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 699 // kmp_routine_entry_t *task_entry); 700 assert(KmpRoutineEntryPtrTy != nullptr && 701 "Type kmp_routine_entry_t must be created."); 702 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 703 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 704 // Return void * and then cast to particular kmp_task_t type. 705 llvm::FunctionType *FnTy = 706 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 707 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 708 break; 709 } 710 case OMPRTL__kmpc_omp_task: { 711 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 712 // *new_task); 713 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 714 CGM.VoidPtrTy}; 715 llvm::FunctionType *FnTy = 716 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 717 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 718 break; 719 } 720 case OMPRTL__kmpc_copyprivate: { 721 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 722 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 723 // kmp_int32 didit); 724 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 725 auto *CpyFnTy = 726 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 727 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 728 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 729 CGM.Int32Ty}; 730 llvm::FunctionType *FnTy = 731 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 732 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 733 break; 734 } 735 case OMPRTL__kmpc_reduce: { 736 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 737 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 738 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 739 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 740 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 741 /*isVarArg=*/false); 742 llvm::Type *TypeParams[] = { 743 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 744 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 745 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 746 llvm::FunctionType *FnTy = 747 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 748 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 749 break; 750 } 751 case OMPRTL__kmpc_reduce_nowait: { 752 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 753 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 754 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 755 // *lck); 756 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 757 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 758 /*isVarArg=*/false); 759 llvm::Type *TypeParams[] = { 760 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 761 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 762 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 763 llvm::FunctionType *FnTy = 764 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 765 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 766 break; 767 } 768 case OMPRTL__kmpc_end_reduce: { 769 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 770 // kmp_critical_name *lck); 771 llvm::Type *TypeParams[] = { 772 getIdentTyPointerTy(), CGM.Int32Ty, 773 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 774 llvm::FunctionType *FnTy = 775 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 776 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 777 break; 778 } 779 case OMPRTL__kmpc_end_reduce_nowait: { 780 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 781 // kmp_critical_name *lck); 782 llvm::Type *TypeParams[] = { 783 getIdentTyPointerTy(), CGM.Int32Ty, 784 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 785 llvm::FunctionType *FnTy = 786 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 787 RTLFn = 788 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 789 break; 790 } 791 case OMPRTL__kmpc_omp_task_begin_if0: { 792 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 793 // *new_task); 794 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 795 CGM.VoidPtrTy}; 796 llvm::FunctionType *FnTy = 797 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 798 RTLFn = 799 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 800 break; 801 } 802 case OMPRTL__kmpc_omp_task_complete_if0: { 803 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 804 // *new_task); 805 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 806 CGM.VoidPtrTy}; 807 llvm::FunctionType *FnTy = 808 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 809 RTLFn = CGM.CreateRuntimeFunction(FnTy, 810 /*Name=*/"__kmpc_omp_task_complete_if0"); 811 break; 812 } 813 case OMPRTL__kmpc_ordered: { 814 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 815 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 816 llvm::FunctionType *FnTy = 817 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 818 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 819 break; 820 } 821 case OMPRTL__kmpc_end_ordered: { 822 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 823 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 824 llvm::FunctionType *FnTy = 825 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 826 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 827 break; 828 } 829 case OMPRTL__kmpc_omp_taskwait: { 830 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 831 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 832 llvm::FunctionType *FnTy = 833 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 834 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 835 break; 836 } 837 case OMPRTL__kmpc_taskgroup: { 838 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 839 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 840 llvm::FunctionType *FnTy = 841 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 842 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 843 break; 844 } 845 case OMPRTL__kmpc_end_taskgroup: { 846 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 847 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 848 llvm::FunctionType *FnTy = 849 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 850 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 851 break; 852 } 853 case OMPRTL__kmpc_push_proc_bind: { 854 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 855 // int proc_bind) 856 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 857 llvm::FunctionType *FnTy = 858 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 859 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 860 break; 861 } 862 case OMPRTL__kmpc_omp_task_with_deps: { 863 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 864 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 865 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 866 llvm::Type *TypeParams[] = { 867 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 868 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 869 llvm::FunctionType *FnTy = 870 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 871 RTLFn = 872 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 873 break; 874 } 875 case OMPRTL__kmpc_omp_wait_deps: { 876 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 877 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 878 // kmp_depend_info_t *noalias_dep_list); 879 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 880 CGM.Int32Ty, CGM.VoidPtrTy, 881 CGM.Int32Ty, CGM.VoidPtrTy}; 882 llvm::FunctionType *FnTy = 883 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 884 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 885 break; 886 } 887 case OMPRTL__kmpc_cancellationpoint: { 888 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 889 // global_tid, kmp_int32 cncl_kind) 890 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 891 llvm::FunctionType *FnTy = 892 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 893 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 894 break; 895 } 896 case OMPRTL__kmpc_cancel: { 897 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 898 // kmp_int32 cncl_kind) 899 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 900 llvm::FunctionType *FnTy = 901 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 902 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 903 break; 904 } 905 case OMPRTL__tgt_target: { 906 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 907 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 908 // *arg_types); 909 llvm::Type *TypeParams[] = {CGM.Int32Ty, 910 CGM.VoidPtrTy, 911 CGM.Int32Ty, 912 CGM.VoidPtrPtrTy, 913 CGM.VoidPtrPtrTy, 914 CGM.SizeTy->getPointerTo(), 915 CGM.Int32Ty->getPointerTo()}; 916 llvm::FunctionType *FnTy = 917 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 918 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 919 break; 920 } 921 } 922 return RTLFn; 923 } 924 925 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) { 926 auto &C = CGF.getContext(); 927 llvm::Value *Size = nullptr; 928 auto SizeInChars = C.getTypeSizeInChars(Ty); 929 if (SizeInChars.isZero()) { 930 // getTypeSizeInChars() returns 0 for a VLA. 931 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 932 llvm::Value *ArraySize; 933 std::tie(ArraySize, Ty) = CGF.getVLASize(VAT); 934 Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 935 } 936 SizeInChars = C.getTypeSizeInChars(Ty); 937 assert(!SizeInChars.isZero()); 938 Size = CGF.Builder.CreateNUWMul( 939 Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity())); 940 } else 941 Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()); 942 return Size; 943 } 944 945 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 946 bool IVSigned) { 947 assert((IVSize == 32 || IVSize == 64) && 948 "IV size is not compatible with the omp runtime"); 949 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 950 : "__kmpc_for_static_init_4u") 951 : (IVSigned ? "__kmpc_for_static_init_8" 952 : "__kmpc_for_static_init_8u"); 953 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 954 auto PtrTy = llvm::PointerType::getUnqual(ITy); 955 llvm::Type *TypeParams[] = { 956 getIdentTyPointerTy(), // loc 957 CGM.Int32Ty, // tid 958 CGM.Int32Ty, // schedtype 959 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 960 PtrTy, // p_lower 961 PtrTy, // p_upper 962 PtrTy, // p_stride 963 ITy, // incr 964 ITy // chunk 965 }; 966 llvm::FunctionType *FnTy = 967 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 968 return CGM.CreateRuntimeFunction(FnTy, Name); 969 } 970 971 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 972 bool IVSigned) { 973 assert((IVSize == 32 || IVSize == 64) && 974 "IV size is not compatible with the omp runtime"); 975 auto Name = 976 IVSize == 32 977 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 978 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 979 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 980 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 981 CGM.Int32Ty, // tid 982 CGM.Int32Ty, // schedtype 983 ITy, // lower 984 ITy, // upper 985 ITy, // stride 986 ITy // chunk 987 }; 988 llvm::FunctionType *FnTy = 989 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 990 return CGM.CreateRuntimeFunction(FnTy, Name); 991 } 992 993 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 994 bool IVSigned) { 995 assert((IVSize == 32 || IVSize == 64) && 996 "IV size is not compatible with the omp runtime"); 997 auto Name = 998 IVSize == 32 999 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1000 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1001 llvm::Type *TypeParams[] = { 1002 getIdentTyPointerTy(), // loc 1003 CGM.Int32Ty, // tid 1004 }; 1005 llvm::FunctionType *FnTy = 1006 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1007 return CGM.CreateRuntimeFunction(FnTy, Name); 1008 } 1009 1010 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1011 bool IVSigned) { 1012 assert((IVSize == 32 || IVSize == 64) && 1013 "IV size is not compatible with the omp runtime"); 1014 auto Name = 1015 IVSize == 32 1016 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1017 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1018 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1019 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1020 llvm::Type *TypeParams[] = { 1021 getIdentTyPointerTy(), // loc 1022 CGM.Int32Ty, // tid 1023 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1024 PtrTy, // p_lower 1025 PtrTy, // p_upper 1026 PtrTy // p_stride 1027 }; 1028 llvm::FunctionType *FnTy = 1029 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1030 return CGM.CreateRuntimeFunction(FnTy, Name); 1031 } 1032 1033 llvm::Constant * 1034 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1035 assert(!CGM.getLangOpts().OpenMPUseTLS || 1036 !CGM.getContext().getTargetInfo().isTLSSupported()); 1037 // Lookup the entry, lazily creating it if necessary. 1038 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1039 Twine(CGM.getMangledName(VD)) + ".cache."); 1040 } 1041 1042 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1043 const VarDecl *VD, 1044 Address VDAddr, 1045 SourceLocation Loc) { 1046 if (CGM.getLangOpts().OpenMPUseTLS && 1047 CGM.getContext().getTargetInfo().isTLSSupported()) 1048 return VDAddr; 1049 1050 auto VarTy = VDAddr.getElementType(); 1051 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1052 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1053 CGM.Int8PtrTy), 1054 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1055 getOrCreateThreadPrivateCache(VD)}; 1056 return Address(CGF.EmitRuntimeCall( 1057 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1058 VDAddr.getAlignment()); 1059 } 1060 1061 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1062 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1063 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1064 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1065 // library. 1066 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1067 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1068 OMPLoc); 1069 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1070 // to register constructor/destructor for variable. 1071 llvm::Value *Args[] = {OMPLoc, 1072 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1073 CGM.VoidPtrTy), 1074 Ctor, CopyCtor, Dtor}; 1075 CGF.EmitRuntimeCall( 1076 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1077 } 1078 1079 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1080 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1081 bool PerformInit, CodeGenFunction *CGF) { 1082 if (CGM.getLangOpts().OpenMPUseTLS && 1083 CGM.getContext().getTargetInfo().isTLSSupported()) 1084 return nullptr; 1085 1086 VD = VD->getDefinition(CGM.getContext()); 1087 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1088 ThreadPrivateWithDefinition.insert(VD); 1089 QualType ASTTy = VD->getType(); 1090 1091 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1092 auto Init = VD->getAnyInitializer(); 1093 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1094 // Generate function that re-emits the declaration's initializer into the 1095 // threadprivate copy of the variable VD 1096 CodeGenFunction CtorCGF(CGM); 1097 FunctionArgList Args; 1098 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1099 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1100 Args.push_back(&Dst); 1101 1102 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1103 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 1104 /*isVariadic=*/false); 1105 auto FTy = CGM.getTypes().GetFunctionType(FI); 1106 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1107 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1108 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1109 Args, SourceLocation()); 1110 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1111 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1112 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1113 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1114 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1115 CtorCGF.ConvertTypeForMem(ASTTy)); 1116 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1117 /*IsInitializer=*/true); 1118 ArgVal = CtorCGF.EmitLoadOfScalar( 1119 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1120 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1121 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1122 CtorCGF.FinishFunction(); 1123 Ctor = Fn; 1124 } 1125 if (VD->getType().isDestructedType() != QualType::DK_none) { 1126 // Generate function that emits destructor call for the threadprivate copy 1127 // of the variable VD 1128 CodeGenFunction DtorCGF(CGM); 1129 FunctionArgList Args; 1130 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1131 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1132 Args.push_back(&Dst); 1133 1134 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1135 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 1136 /*isVariadic=*/false); 1137 auto FTy = CGM.getTypes().GetFunctionType(FI); 1138 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1139 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1140 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1141 SourceLocation()); 1142 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1143 DtorCGF.GetAddrOfLocalVar(&Dst), 1144 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1145 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1146 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1147 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1148 DtorCGF.FinishFunction(); 1149 Dtor = Fn; 1150 } 1151 // Do not emit init function if it is not required. 1152 if (!Ctor && !Dtor) 1153 return nullptr; 1154 1155 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1156 auto CopyCtorTy = 1157 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1158 /*isVarArg=*/false)->getPointerTo(); 1159 // Copying constructor for the threadprivate variable. 1160 // Must be NULL - reserved by runtime, but currently it requires that this 1161 // parameter is always NULL. Otherwise it fires assertion. 1162 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1163 if (Ctor == nullptr) { 1164 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1165 /*isVarArg=*/false)->getPointerTo(); 1166 Ctor = llvm::Constant::getNullValue(CtorTy); 1167 } 1168 if (Dtor == nullptr) { 1169 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1170 /*isVarArg=*/false)->getPointerTo(); 1171 Dtor = llvm::Constant::getNullValue(DtorTy); 1172 } 1173 if (!CGF) { 1174 auto InitFunctionTy = 1175 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1176 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1177 InitFunctionTy, ".__omp_threadprivate_init_.", 1178 CGM.getTypes().arrangeNullaryFunction()); 1179 CodeGenFunction InitCGF(CGM); 1180 FunctionArgList ArgList; 1181 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1182 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1183 Loc); 1184 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1185 InitCGF.FinishFunction(); 1186 return InitFunction; 1187 } 1188 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1189 } 1190 return nullptr; 1191 } 1192 1193 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1194 /// function. Here is the logic: 1195 /// if (Cond) { 1196 /// ThenGen(); 1197 /// } else { 1198 /// ElseGen(); 1199 /// } 1200 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1201 const RegionCodeGenTy &ThenGen, 1202 const RegionCodeGenTy &ElseGen) { 1203 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1204 1205 // If the condition constant folds and can be elided, try to avoid emitting 1206 // the condition and the dead arm of the if/else. 1207 bool CondConstant; 1208 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1209 CodeGenFunction::RunCleanupsScope Scope(CGF); 1210 if (CondConstant) { 1211 ThenGen(CGF); 1212 } else { 1213 ElseGen(CGF); 1214 } 1215 return; 1216 } 1217 1218 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1219 // emit the conditional branch. 1220 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1221 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1222 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1223 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1224 1225 // Emit the 'then' code. 1226 CGF.EmitBlock(ThenBlock); 1227 { 1228 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1229 ThenGen(CGF); 1230 } 1231 CGF.EmitBranch(ContBlock); 1232 // Emit the 'else' code if present. 1233 { 1234 // There is no need to emit line number for unconditional branch. 1235 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1236 CGF.EmitBlock(ElseBlock); 1237 } 1238 { 1239 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1240 ElseGen(CGF); 1241 } 1242 { 1243 // There is no need to emit line number for unconditional branch. 1244 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1245 CGF.EmitBranch(ContBlock); 1246 } 1247 // Emit the continuation block for code after the if. 1248 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1249 } 1250 1251 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1252 llvm::Value *OutlinedFn, 1253 ArrayRef<llvm::Value *> CapturedVars, 1254 const Expr *IfCond) { 1255 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1256 auto &&ThenGen = [this, OutlinedFn, CapturedVars, 1257 RTLoc](CodeGenFunction &CGF) { 1258 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1259 llvm::Value *Args[] = { 1260 RTLoc, 1261 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1262 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 1263 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1264 RealArgs.append(std::begin(Args), std::end(Args)); 1265 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1266 1267 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 1268 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1269 }; 1270 auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, 1271 Loc](CodeGenFunction &CGF) { 1272 auto ThreadID = getThreadID(CGF, Loc); 1273 // Build calls: 1274 // __kmpc_serialized_parallel(&Loc, GTid); 1275 llvm::Value *Args[] = {RTLoc, ThreadID}; 1276 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 1277 Args); 1278 1279 // OutlinedFn(>id, &zero, CapturedStruct); 1280 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 1281 Address ZeroAddr = 1282 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1283 /*Name*/ ".zero.addr"); 1284 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1285 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1286 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1287 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1288 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1289 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1290 1291 // __kmpc_end_serialized_parallel(&Loc, GTid); 1292 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 1293 CGF.EmitRuntimeCall( 1294 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 1295 }; 1296 if (IfCond) { 1297 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1298 } else { 1299 CodeGenFunction::RunCleanupsScope Scope(CGF); 1300 ThenGen(CGF); 1301 } 1302 } 1303 1304 // If we're inside an (outlined) parallel region, use the region info's 1305 // thread-ID variable (it is passed in a first argument of the outlined function 1306 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1307 // regular serial code region, get thread ID by calling kmp_int32 1308 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1309 // return the address of that temp. 1310 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1311 SourceLocation Loc) { 1312 if (auto OMPRegionInfo = 1313 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1314 if (OMPRegionInfo->getThreadIDVariable()) 1315 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1316 1317 auto ThreadID = getThreadID(CGF, Loc); 1318 auto Int32Ty = 1319 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1320 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1321 CGF.EmitStoreOfScalar(ThreadID, 1322 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1323 1324 return ThreadIDTemp; 1325 } 1326 1327 llvm::Constant * 1328 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1329 const llvm::Twine &Name) { 1330 SmallString<256> Buffer; 1331 llvm::raw_svector_ostream Out(Buffer); 1332 Out << Name; 1333 auto RuntimeName = Out.str(); 1334 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1335 if (Elem.second) { 1336 assert(Elem.second->getType()->getPointerElementType() == Ty && 1337 "OMP internal variable has different type than requested"); 1338 return &*Elem.second; 1339 } 1340 1341 return Elem.second = new llvm::GlobalVariable( 1342 CGM.getModule(), Ty, /*IsConstant*/ false, 1343 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1344 Elem.first()); 1345 } 1346 1347 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1348 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1349 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1350 } 1351 1352 namespace { 1353 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup { 1354 llvm::Value *Callee; 1355 llvm::Value *Args[N]; 1356 1357 public: 1358 CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) 1359 : Callee(Callee) { 1360 assert(CleanupArgs.size() == N); 1361 std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); 1362 } 1363 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 1364 CGF.EmitRuntimeCall(Callee, Args); 1365 } 1366 }; 1367 } // anonymous namespace 1368 1369 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1370 StringRef CriticalName, 1371 const RegionCodeGenTy &CriticalOpGen, 1372 SourceLocation Loc) { 1373 // __kmpc_critical(ident_t *, gtid, Lock); 1374 // CriticalOpGen(); 1375 // __kmpc_end_critical(ident_t *, gtid, Lock); 1376 // Prepare arguments and build a call to __kmpc_critical 1377 { 1378 CodeGenFunction::RunCleanupsScope Scope(CGF); 1379 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1380 getCriticalRegionLock(CriticalName)}; 1381 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 1382 // Build a call to __kmpc_end_critical 1383 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1384 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), 1385 llvm::makeArrayRef(Args)); 1386 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 1387 } 1388 } 1389 1390 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 1391 OpenMPDirectiveKind Kind, SourceLocation Loc, 1392 const RegionCodeGenTy &BodyOpGen) { 1393 llvm::Value *CallBool = CGF.EmitScalarConversion( 1394 IfCond, 1395 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 1396 CGF.getContext().BoolTy, Loc); 1397 1398 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1399 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 1400 // Generate the branch (If-stmt) 1401 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1402 CGF.EmitBlock(ThenBlock); 1403 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); 1404 // Emit the rest of bblocks/branches 1405 CGF.EmitBranch(ContBlock); 1406 CGF.EmitBlock(ContBlock, true); 1407 } 1408 1409 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 1410 const RegionCodeGenTy &MasterOpGen, 1411 SourceLocation Loc) { 1412 // if(__kmpc_master(ident_t *, gtid)) { 1413 // MasterOpGen(); 1414 // __kmpc_end_master(ident_t *, gtid); 1415 // } 1416 // Prepare arguments and build a call to __kmpc_master 1417 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1418 auto *IsMaster = 1419 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 1420 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1421 MasterCallEndCleanup; 1422 emitIfStmt( 1423 CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { 1424 CodeGenFunction::RunCleanupsScope Scope(CGF); 1425 CGF.EHStack.pushCleanup<MasterCallEndCleanup>( 1426 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), 1427 llvm::makeArrayRef(Args)); 1428 MasterOpGen(CGF); 1429 }); 1430 } 1431 1432 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 1433 SourceLocation Loc) { 1434 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 1435 llvm::Value *Args[] = { 1436 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1437 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 1438 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 1439 } 1440 1441 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 1442 const RegionCodeGenTy &TaskgroupOpGen, 1443 SourceLocation Loc) { 1444 // __kmpc_taskgroup(ident_t *, gtid); 1445 // TaskgroupOpGen(); 1446 // __kmpc_end_taskgroup(ident_t *, gtid); 1447 // Prepare arguments and build a call to __kmpc_taskgroup 1448 { 1449 CodeGenFunction::RunCleanupsScope Scope(CGF); 1450 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1451 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); 1452 // Build a call to __kmpc_end_taskgroup 1453 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1454 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 1455 llvm::makeArrayRef(Args)); 1456 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 1457 } 1458 } 1459 1460 /// Given an array of pointers to variables, project the address of a 1461 /// given variable. 1462 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 1463 unsigned Index, const VarDecl *Var) { 1464 // Pull out the pointer to the variable. 1465 Address PtrAddr = 1466 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 1467 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 1468 1469 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 1470 Addr = CGF.Builder.CreateElementBitCast( 1471 Addr, CGF.ConvertTypeForMem(Var->getType())); 1472 return Addr; 1473 } 1474 1475 static llvm::Value *emitCopyprivateCopyFunction( 1476 CodeGenModule &CGM, llvm::Type *ArgsType, 1477 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 1478 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 1479 auto &C = CGM.getContext(); 1480 // void copy_func(void *LHSArg, void *RHSArg); 1481 FunctionArgList Args; 1482 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1483 C.VoidPtrTy); 1484 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1485 C.VoidPtrTy); 1486 Args.push_back(&LHSArg); 1487 Args.push_back(&RHSArg); 1488 FunctionType::ExtInfo EI; 1489 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1490 C.VoidTy, Args, EI, /*isVariadic=*/false); 1491 auto *Fn = llvm::Function::Create( 1492 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1493 ".omp.copyprivate.copy_func", &CGM.getModule()); 1494 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 1495 CodeGenFunction CGF(CGM); 1496 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1497 // Dest = (void*[n])(LHSArg); 1498 // Src = (void*[n])(RHSArg); 1499 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1500 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 1501 ArgsType), CGF.getPointerAlign()); 1502 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1503 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 1504 ArgsType), CGF.getPointerAlign()); 1505 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1506 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1507 // ... 1508 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1509 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1510 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 1511 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 1512 1513 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 1514 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 1515 1516 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 1517 QualType Type = VD->getType(); 1518 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 1519 } 1520 CGF.FinishFunction(); 1521 return Fn; 1522 } 1523 1524 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1525 const RegionCodeGenTy &SingleOpGen, 1526 SourceLocation Loc, 1527 ArrayRef<const Expr *> CopyprivateVars, 1528 ArrayRef<const Expr *> SrcExprs, 1529 ArrayRef<const Expr *> DstExprs, 1530 ArrayRef<const Expr *> AssignmentOps) { 1531 assert(CopyprivateVars.size() == SrcExprs.size() && 1532 CopyprivateVars.size() == DstExprs.size() && 1533 CopyprivateVars.size() == AssignmentOps.size()); 1534 auto &C = CGM.getContext(); 1535 // int32 did_it = 0; 1536 // if(__kmpc_single(ident_t *, gtid)) { 1537 // SingleOpGen(); 1538 // __kmpc_end_single(ident_t *, gtid); 1539 // did_it = 1; 1540 // } 1541 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1542 // <copy_func>, did_it); 1543 1544 Address DidIt = Address::invalid(); 1545 if (!CopyprivateVars.empty()) { 1546 // int32 did_it = 0; 1547 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1548 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 1549 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 1550 } 1551 // Prepare arguments and build a call to __kmpc_single 1552 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1553 auto *IsSingle = 1554 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 1555 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1556 SingleCallEndCleanup; 1557 emitIfStmt( 1558 CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { 1559 CodeGenFunction::RunCleanupsScope Scope(CGF); 1560 CGF.EHStack.pushCleanup<SingleCallEndCleanup>( 1561 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), 1562 llvm::makeArrayRef(Args)); 1563 SingleOpGen(CGF); 1564 if (DidIt.isValid()) { 1565 // did_it = 1; 1566 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 1567 } 1568 }); 1569 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1570 // <copy_func>, did_it); 1571 if (DidIt.isValid()) { 1572 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 1573 auto CopyprivateArrayTy = 1574 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1575 /*IndexTypeQuals=*/0); 1576 // Create a list of all private variables for copyprivate. 1577 Address CopyprivateList = 1578 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 1579 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 1580 Address Elem = CGF.Builder.CreateConstArrayGEP( 1581 CopyprivateList, I, CGF.getPointerSize()); 1582 CGF.Builder.CreateStore( 1583 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1584 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 1585 Elem); 1586 } 1587 // Build function that copies private values from single region to all other 1588 // threads in the corresponding parallel region. 1589 auto *CpyFn = emitCopyprivateCopyFunction( 1590 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 1591 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 1592 auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy); 1593 Address CL = 1594 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 1595 CGF.VoidPtrTy); 1596 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 1597 llvm::Value *Args[] = { 1598 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 1599 getThreadID(CGF, Loc), // i32 <gtid> 1600 BufSize, // size_t <buf_size> 1601 CL.getPointer(), // void *<copyprivate list> 1602 CpyFn, // void (*) (void *, void *) <copy_func> 1603 DidItVal // i32 did_it 1604 }; 1605 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 1606 } 1607 } 1608 1609 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 1610 const RegionCodeGenTy &OrderedOpGen, 1611 SourceLocation Loc, bool IsThreads) { 1612 // __kmpc_ordered(ident_t *, gtid); 1613 // OrderedOpGen(); 1614 // __kmpc_end_ordered(ident_t *, gtid); 1615 // Prepare arguments and build a call to __kmpc_ordered 1616 CodeGenFunction::RunCleanupsScope Scope(CGF); 1617 if (IsThreads) { 1618 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1619 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); 1620 // Build a call to __kmpc_end_ordered 1621 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1622 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), 1623 llvm::makeArrayRef(Args)); 1624 } 1625 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 1626 } 1627 1628 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 1629 OpenMPDirectiveKind Kind, bool EmitChecks, 1630 bool ForceSimpleCall) { 1631 // Build call __kmpc_cancel_barrier(loc, thread_id); 1632 // Build call __kmpc_barrier(loc, thread_id); 1633 OpenMPLocationFlags Flags = OMP_IDENT_KMPC; 1634 if (Kind == OMPD_for) { 1635 Flags = 1636 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); 1637 } else if (Kind == OMPD_sections) { 1638 Flags = static_cast<OpenMPLocationFlags>(Flags | 1639 OMP_IDENT_BARRIER_IMPL_SECTIONS); 1640 } else if (Kind == OMPD_single) { 1641 Flags = 1642 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); 1643 } else if (Kind == OMPD_barrier) { 1644 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); 1645 } else { 1646 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); 1647 } 1648 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 1649 // thread_id); 1650 auto *OMPRegionInfo = 1651 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 1652 // Do not emit barrier call in the single directive emitted in some rare cases 1653 // for sections directives. 1654 if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single) 1655 return; 1656 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 1657 getThreadID(CGF, Loc)}; 1658 if (OMPRegionInfo) { 1659 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 1660 auto *Result = CGF.EmitRuntimeCall( 1661 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 1662 if (EmitChecks) { 1663 // if (__kmpc_cancel_barrier()) { 1664 // exit from construct; 1665 // } 1666 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 1667 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 1668 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 1669 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 1670 CGF.EmitBlock(ExitBB); 1671 // exit from construct; 1672 auto CancelDestination = 1673 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 1674 CGF.EmitBranchThroughCleanup(CancelDestination); 1675 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 1676 } 1677 return; 1678 } 1679 } 1680 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 1681 } 1682 1683 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 1684 /// the enum sched_type in kmp.h). 1685 enum OpenMPSchedType { 1686 /// \brief Lower bound for default (unordered) versions. 1687 OMP_sch_lower = 32, 1688 OMP_sch_static_chunked = 33, 1689 OMP_sch_static = 34, 1690 OMP_sch_dynamic_chunked = 35, 1691 OMP_sch_guided_chunked = 36, 1692 OMP_sch_runtime = 37, 1693 OMP_sch_auto = 38, 1694 /// \brief Lower bound for 'ordered' versions. 1695 OMP_ord_lower = 64, 1696 OMP_ord_static_chunked = 65, 1697 OMP_ord_static = 66, 1698 OMP_ord_dynamic_chunked = 67, 1699 OMP_ord_guided_chunked = 68, 1700 OMP_ord_runtime = 69, 1701 OMP_ord_auto = 70, 1702 OMP_sch_default = OMP_sch_static, 1703 }; 1704 1705 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 1706 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 1707 bool Chunked, bool Ordered) { 1708 switch (ScheduleKind) { 1709 case OMPC_SCHEDULE_static: 1710 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 1711 : (Ordered ? OMP_ord_static : OMP_sch_static); 1712 case OMPC_SCHEDULE_dynamic: 1713 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 1714 case OMPC_SCHEDULE_guided: 1715 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 1716 case OMPC_SCHEDULE_runtime: 1717 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 1718 case OMPC_SCHEDULE_auto: 1719 return Ordered ? OMP_ord_auto : OMP_sch_auto; 1720 case OMPC_SCHEDULE_unknown: 1721 assert(!Chunked && "chunk was specified but schedule kind not known"); 1722 return Ordered ? OMP_ord_static : OMP_sch_static; 1723 } 1724 llvm_unreachable("Unexpected runtime schedule"); 1725 } 1726 1727 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 1728 bool Chunked) const { 1729 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 1730 return Schedule == OMP_sch_static; 1731 } 1732 1733 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 1734 auto Schedule = 1735 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 1736 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 1737 return Schedule != OMP_sch_static; 1738 } 1739 1740 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 1741 SourceLocation Loc, 1742 OpenMPScheduleClauseKind ScheduleKind, 1743 unsigned IVSize, bool IVSigned, 1744 bool Ordered, llvm::Value *UB, 1745 llvm::Value *Chunk) { 1746 OpenMPSchedType Schedule = 1747 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1748 assert(Ordered || 1749 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 1750 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); 1751 // Call __kmpc_dispatch_init( 1752 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 1753 // kmp_int[32|64] lower, kmp_int[32|64] upper, 1754 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 1755 1756 // If the Chunk was not specified in the clause - use default value 1. 1757 if (Chunk == nullptr) 1758 Chunk = CGF.Builder.getIntN(IVSize, 1); 1759 llvm::Value *Args[] = { 1760 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1761 getThreadID(CGF, Loc), 1762 CGF.Builder.getInt32(Schedule), // Schedule type 1763 CGF.Builder.getIntN(IVSize, 0), // Lower 1764 UB, // Upper 1765 CGF.Builder.getIntN(IVSize, 1), // Stride 1766 Chunk // Chunk 1767 }; 1768 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 1769 } 1770 1771 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 1772 SourceLocation Loc, 1773 OpenMPScheduleClauseKind ScheduleKind, 1774 unsigned IVSize, bool IVSigned, 1775 bool Ordered, Address IL, Address LB, 1776 Address UB, Address ST, 1777 llvm::Value *Chunk) { 1778 OpenMPSchedType Schedule = 1779 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1780 assert(!Ordered); 1781 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 1782 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked); 1783 1784 // Call __kmpc_for_static_init( 1785 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 1786 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 1787 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 1788 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 1789 if (Chunk == nullptr) { 1790 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && 1791 "expected static non-chunked schedule"); 1792 // If the Chunk was not specified in the clause - use default value 1. 1793 Chunk = CGF.Builder.getIntN(IVSize, 1); 1794 } else { 1795 assert((Schedule == OMP_sch_static_chunked || 1796 Schedule == OMP_ord_static_chunked) && 1797 "expected static chunked schedule"); 1798 } 1799 llvm::Value *Args[] = { 1800 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1801 getThreadID(CGF, Loc), 1802 CGF.Builder.getInt32(Schedule), // Schedule type 1803 IL.getPointer(), // &isLastIter 1804 LB.getPointer(), // &LB 1805 UB.getPointer(), // &UB 1806 ST.getPointer(), // &Stride 1807 CGF.Builder.getIntN(IVSize, 1), // Incr 1808 Chunk // Chunk 1809 }; 1810 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); 1811 } 1812 1813 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 1814 SourceLocation Loc) { 1815 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1816 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1817 getThreadID(CGF, Loc)}; 1818 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1819 Args); 1820 } 1821 1822 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 1823 SourceLocation Loc, 1824 unsigned IVSize, 1825 bool IVSigned) { 1826 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 1827 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1828 getThreadID(CGF, Loc)}; 1829 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 1830 } 1831 1832 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 1833 SourceLocation Loc, unsigned IVSize, 1834 bool IVSigned, Address IL, 1835 Address LB, Address UB, 1836 Address ST) { 1837 // Call __kmpc_dispatch_next( 1838 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 1839 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 1840 // kmp_int[32|64] *p_stride); 1841 llvm::Value *Args[] = { 1842 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 1843 IL.getPointer(), // &isLastIter 1844 LB.getPointer(), // &Lower 1845 UB.getPointer(), // &Upper 1846 ST.getPointer() // &Stride 1847 }; 1848 llvm::Value *Call = 1849 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 1850 return CGF.EmitScalarConversion( 1851 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 1852 CGF.getContext().BoolTy, Loc); 1853 } 1854 1855 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1856 llvm::Value *NumThreads, 1857 SourceLocation Loc) { 1858 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1859 llvm::Value *Args[] = { 1860 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1861 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1862 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1863 Args); 1864 } 1865 1866 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 1867 OpenMPProcBindClauseKind ProcBind, 1868 SourceLocation Loc) { 1869 // Constants for proc bind value accepted by the runtime. 1870 enum ProcBindTy { 1871 ProcBindFalse = 0, 1872 ProcBindTrue, 1873 ProcBindMaster, 1874 ProcBindClose, 1875 ProcBindSpread, 1876 ProcBindIntel, 1877 ProcBindDefault 1878 } RuntimeProcBind; 1879 switch (ProcBind) { 1880 case OMPC_PROC_BIND_master: 1881 RuntimeProcBind = ProcBindMaster; 1882 break; 1883 case OMPC_PROC_BIND_close: 1884 RuntimeProcBind = ProcBindClose; 1885 break; 1886 case OMPC_PROC_BIND_spread: 1887 RuntimeProcBind = ProcBindSpread; 1888 break; 1889 case OMPC_PROC_BIND_unknown: 1890 llvm_unreachable("Unsupported proc_bind value."); 1891 } 1892 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 1893 llvm::Value *Args[] = { 1894 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1895 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 1896 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 1897 } 1898 1899 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1900 SourceLocation Loc) { 1901 // Build call void __kmpc_flush(ident_t *loc) 1902 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1903 emitUpdateLocation(CGF, Loc)); 1904 } 1905 1906 namespace { 1907 /// \brief Indexes of fields for type kmp_task_t. 1908 enum KmpTaskTFields { 1909 /// \brief List of shared variables. 1910 KmpTaskTShareds, 1911 /// \brief Task routine. 1912 KmpTaskTRoutine, 1913 /// \brief Partition id for the untied tasks. 1914 KmpTaskTPartId, 1915 /// \brief Function with call of destructors for private variables. 1916 KmpTaskTDestructors, 1917 }; 1918 } // anonymous namespace 1919 1920 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 1921 if (!KmpRoutineEntryPtrTy) { 1922 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 1923 auto &C = CGM.getContext(); 1924 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 1925 FunctionProtoType::ExtProtoInfo EPI; 1926 KmpRoutineEntryPtrQTy = C.getPointerType( 1927 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 1928 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 1929 } 1930 } 1931 1932 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1933 QualType FieldTy) { 1934 auto *Field = FieldDecl::Create( 1935 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1936 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1937 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1938 Field->setAccess(AS_public); 1939 DC->addDecl(Field); 1940 return Field; 1941 } 1942 1943 namespace { 1944 struct PrivateHelpersTy { 1945 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 1946 const VarDecl *PrivateElemInit) 1947 : Original(Original), PrivateCopy(PrivateCopy), 1948 PrivateElemInit(PrivateElemInit) {} 1949 const VarDecl *Original; 1950 const VarDecl *PrivateCopy; 1951 const VarDecl *PrivateElemInit; 1952 }; 1953 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 1954 } // anonymous namespace 1955 1956 static RecordDecl * 1957 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 1958 if (!Privates.empty()) { 1959 auto &C = CGM.getContext(); 1960 // Build struct .kmp_privates_t. { 1961 // /* private vars */ 1962 // }; 1963 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 1964 RD->startDefinition(); 1965 for (auto &&Pair : Privates) { 1966 auto *VD = Pair.second.Original; 1967 auto Type = VD->getType(); 1968 Type = Type.getNonReferenceType(); 1969 auto *FD = addFieldToRecordDecl(C, RD, Type); 1970 if (VD->hasAttrs()) { 1971 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 1972 E(VD->getAttrs().end()); 1973 I != E; ++I) 1974 FD->addAttr(*I); 1975 } 1976 } 1977 RD->completeDefinition(); 1978 return RD; 1979 } 1980 return nullptr; 1981 } 1982 1983 static RecordDecl * 1984 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, 1985 QualType KmpRoutineEntryPointerQTy) { 1986 auto &C = CGM.getContext(); 1987 // Build struct kmp_task_t { 1988 // void * shareds; 1989 // kmp_routine_entry_t routine; 1990 // kmp_int32 part_id; 1991 // kmp_routine_entry_t destructors; 1992 // }; 1993 auto *RD = C.buildImplicitRecord("kmp_task_t"); 1994 RD->startDefinition(); 1995 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1996 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1997 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1998 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1999 RD->completeDefinition(); 2000 return RD; 2001 } 2002 2003 static RecordDecl * 2004 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 2005 ArrayRef<PrivateDataTy> Privates) { 2006 auto &C = CGM.getContext(); 2007 // Build struct kmp_task_t_with_privates { 2008 // kmp_task_t task_data; 2009 // .kmp_privates_t. privates; 2010 // }; 2011 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 2012 RD->startDefinition(); 2013 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 2014 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 2015 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 2016 } 2017 RD->completeDefinition(); 2018 return RD; 2019 } 2020 2021 /// \brief Emit a proxy function which accepts kmp_task_t as the second 2022 /// argument. 2023 /// \code 2024 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 2025 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, 2026 /// tt->shareds); 2027 /// return 0; 2028 /// } 2029 /// \endcode 2030 static llvm::Value * 2031 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 2032 QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, 2033 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 2034 QualType SharedsPtrTy, llvm::Value *TaskFunction, 2035 llvm::Value *TaskPrivatesMap) { 2036 auto &C = CGM.getContext(); 2037 FunctionArgList Args; 2038 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 2039 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 2040 /*Id=*/nullptr, 2041 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 2042 Args.push_back(&GtidArg); 2043 Args.push_back(&TaskTypeArg); 2044 FunctionType::ExtInfo Info; 2045 auto &TaskEntryFnInfo = 2046 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 2047 /*isVariadic=*/false); 2048 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 2049 auto *TaskEntry = 2050 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 2051 ".omp_task_entry.", &CGM.getModule()); 2052 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 2053 CodeGenFunction CGF(CGM); 2054 CGF.disableDebugInfo(); 2055 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 2056 2057 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 2058 // tt->task_data.shareds); 2059 auto *GtidParam = CGF.EmitLoadOfScalar( 2060 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 2061 LValue TDBase = emitLoadOfPointerLValue( 2062 CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); 2063 auto *KmpTaskTWithPrivatesQTyRD = 2064 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2065 LValue Base = 2066 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2067 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2068 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 2069 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 2070 auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); 2071 2072 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 2073 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 2074 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2075 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 2076 CGF.ConvertTypeForMem(SharedsPtrTy)); 2077 2078 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 2079 llvm::Value *PrivatesParam; 2080 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 2081 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 2082 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2083 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 2084 } else { 2085 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2086 } 2087 2088 llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, 2089 TaskPrivatesMap, SharedsParam}; 2090 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 2091 CGF.EmitStoreThroughLValue( 2092 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 2093 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 2094 CGF.FinishFunction(); 2095 return TaskEntry; 2096 } 2097 2098 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 2099 SourceLocation Loc, 2100 QualType KmpInt32Ty, 2101 QualType KmpTaskTWithPrivatesPtrQTy, 2102 QualType KmpTaskTWithPrivatesQTy) { 2103 auto &C = CGM.getContext(); 2104 FunctionArgList Args; 2105 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 2106 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 2107 /*Id=*/nullptr, 2108 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 2109 Args.push_back(&GtidArg); 2110 Args.push_back(&TaskTypeArg); 2111 FunctionType::ExtInfo Info; 2112 auto &DestructorFnInfo = 2113 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 2114 /*isVariadic=*/false); 2115 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 2116 auto *DestructorFn = 2117 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 2118 ".omp_task_destructor.", &CGM.getModule()); 2119 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 2120 DestructorFnInfo); 2121 CodeGenFunction CGF(CGM); 2122 CGF.disableDebugInfo(); 2123 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 2124 Args); 2125 2126 LValue Base = emitLoadOfPointerLValue( 2127 CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); 2128 auto *KmpTaskTWithPrivatesQTyRD = 2129 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2130 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2131 Base = CGF.EmitLValueForField(Base, *FI); 2132 for (auto *Field : 2133 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 2134 if (auto DtorKind = Field->getType().isDestructedType()) { 2135 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 2136 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 2137 } 2138 } 2139 CGF.FinishFunction(); 2140 return DestructorFn; 2141 } 2142 2143 /// \brief Emit a privates mapping function for correct handling of private and 2144 /// firstprivate variables. 2145 /// \code 2146 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 2147 /// **noalias priv1,..., <tyn> **noalias privn) { 2148 /// *priv1 = &.privates.priv1; 2149 /// ...; 2150 /// *privn = &.privates.privn; 2151 /// } 2152 /// \endcode 2153 static llvm::Value * 2154 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 2155 ArrayRef<const Expr *> PrivateVars, 2156 ArrayRef<const Expr *> FirstprivateVars, 2157 QualType PrivatesQTy, 2158 ArrayRef<PrivateDataTy> Privates) { 2159 auto &C = CGM.getContext(); 2160 FunctionArgList Args; 2161 ImplicitParamDecl TaskPrivatesArg( 2162 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 2163 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 2164 Args.push_back(&TaskPrivatesArg); 2165 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 2166 unsigned Counter = 1; 2167 for (auto *E: PrivateVars) { 2168 Args.push_back(ImplicitParamDecl::Create( 2169 C, /*DC=*/nullptr, Loc, 2170 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2171 .withConst() 2172 .withRestrict())); 2173 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2174 PrivateVarsPos[VD] = Counter; 2175 ++Counter; 2176 } 2177 for (auto *E : FirstprivateVars) { 2178 Args.push_back(ImplicitParamDecl::Create( 2179 C, /*DC=*/nullptr, Loc, 2180 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2181 .withConst() 2182 .withRestrict())); 2183 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2184 PrivateVarsPos[VD] = Counter; 2185 ++Counter; 2186 } 2187 FunctionType::ExtInfo Info; 2188 auto &TaskPrivatesMapFnInfo = 2189 CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info, 2190 /*isVariadic=*/false); 2191 auto *TaskPrivatesMapTy = 2192 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 2193 auto *TaskPrivatesMap = llvm::Function::Create( 2194 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 2195 ".omp_task_privates_map.", &CGM.getModule()); 2196 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 2197 TaskPrivatesMapFnInfo); 2198 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 2199 CodeGenFunction CGF(CGM); 2200 CGF.disableDebugInfo(); 2201 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 2202 TaskPrivatesMapFnInfo, Args); 2203 2204 // *privi = &.privates.privi; 2205 LValue Base = emitLoadOfPointerLValue( 2206 CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()); 2207 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 2208 Counter = 0; 2209 for (auto *Field : PrivatesQTyRD->fields()) { 2210 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 2211 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 2212 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 2213 auto RefLoadLVal = 2214 emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType()); 2215 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 2216 ++Counter; 2217 } 2218 CGF.FinishFunction(); 2219 return TaskPrivatesMap; 2220 } 2221 2222 static int array_pod_sort_comparator(const PrivateDataTy *P1, 2223 const PrivateDataTy *P2) { 2224 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 2225 } 2226 2227 void CGOpenMPRuntime::emitTaskCall( 2228 CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, 2229 bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 2230 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 2231 const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, 2232 ArrayRef<const Expr *> PrivateCopies, 2233 ArrayRef<const Expr *> FirstprivateVars, 2234 ArrayRef<const Expr *> FirstprivateCopies, 2235 ArrayRef<const Expr *> FirstprivateInits, 2236 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { 2237 auto &C = CGM.getContext(); 2238 llvm::SmallVector<PrivateDataTy, 8> Privates; 2239 // Aggregate privates and sort them by the alignment. 2240 auto I = PrivateCopies.begin(); 2241 for (auto *E : PrivateVars) { 2242 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2243 Privates.push_back(std::make_pair( 2244 C.getDeclAlign(VD), 2245 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2246 /*PrivateElemInit=*/nullptr))); 2247 ++I; 2248 } 2249 I = FirstprivateCopies.begin(); 2250 auto IElemInitRef = FirstprivateInits.begin(); 2251 for (auto *E : FirstprivateVars) { 2252 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2253 Privates.push_back(std::make_pair( 2254 C.getDeclAlign(VD), 2255 PrivateHelpersTy( 2256 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2257 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 2258 ++I, ++IElemInitRef; 2259 } 2260 llvm::array_pod_sort(Privates.begin(), Privates.end(), 2261 array_pod_sort_comparator); 2262 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2263 // Build type kmp_routine_entry_t (if not built yet). 2264 emitKmpRoutineEntryT(KmpInt32Ty); 2265 // Build type kmp_task_t (if not built yet). 2266 if (KmpTaskTQTy.isNull()) { 2267 KmpTaskTQTy = C.getRecordType( 2268 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); 2269 } 2270 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2271 // Build particular struct kmp_task_t for the given task. 2272 auto *KmpTaskTWithPrivatesQTyRD = 2273 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 2274 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 2275 QualType KmpTaskTWithPrivatesPtrQTy = 2276 C.getPointerType(KmpTaskTWithPrivatesQTy); 2277 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 2278 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 2279 auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy); 2280 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 2281 2282 // Emit initial values for private copies (if any). 2283 llvm::Value *TaskPrivatesMap = nullptr; 2284 auto *TaskPrivatesMapTy = 2285 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 2286 3) 2287 ->getType(); 2288 if (!Privates.empty()) { 2289 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2290 TaskPrivatesMap = emitTaskPrivateMappingFunction( 2291 CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); 2292 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2293 TaskPrivatesMap, TaskPrivatesMapTy); 2294 } else { 2295 TaskPrivatesMap = llvm::ConstantPointerNull::get( 2296 cast<llvm::PointerType>(TaskPrivatesMapTy)); 2297 } 2298 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 2299 // kmp_task_t *tt); 2300 auto *TaskEntry = emitProxyTaskFunction( 2301 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, 2302 KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); 2303 2304 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2305 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2306 // kmp_routine_entry_t *task_entry); 2307 // Task flags. Format is taken from 2308 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 2309 // description of kmp_tasking_flags struct. 2310 const unsigned TiedFlag = 0x1; 2311 const unsigned FinalFlag = 0x2; 2312 unsigned Flags = Tied ? TiedFlag : 0; 2313 auto *TaskFlags = 2314 Final.getPointer() 2315 ? CGF.Builder.CreateSelect(Final.getPointer(), 2316 CGF.Builder.getInt32(FinalFlag), 2317 CGF.Builder.getInt32(/*C=*/0)) 2318 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 2319 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 2320 auto *SharedsSize = getTypeSize(CGF, SharedsTy); 2321 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 2322 getThreadID(CGF, Loc), TaskFlags, 2323 KmpTaskTWithPrivatesTySize, SharedsSize, 2324 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2325 TaskEntry, KmpRoutineEntryPtrTy)}; 2326 auto *NewTask = CGF.EmitRuntimeCall( 2327 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 2328 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2329 NewTask, KmpTaskTWithPrivatesPtrTy); 2330 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 2331 KmpTaskTWithPrivatesQTy); 2332 LValue TDBase = 2333 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2334 // Fill the data in the resulting kmp_task_t record. 2335 // Copy shareds if there are any. 2336 Address KmpTaskSharedsPtr = Address::invalid(); 2337 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 2338 KmpTaskSharedsPtr = 2339 Address(CGF.EmitLoadOfScalar( 2340 CGF.EmitLValueForField( 2341 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 2342 KmpTaskTShareds)), 2343 Loc), 2344 CGF.getNaturalTypeAlignment(SharedsTy)); 2345 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 2346 } 2347 // Emit initial values for private copies (if any). 2348 bool NeedsCleanup = false; 2349 if (!Privates.empty()) { 2350 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2351 auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); 2352 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 2353 LValue SharedsBase; 2354 if (!FirstprivateVars.empty()) { 2355 SharedsBase = CGF.MakeAddrLValue( 2356 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2357 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 2358 SharedsTy); 2359 } 2360 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 2361 cast<CapturedStmt>(*D.getAssociatedStmt())); 2362 for (auto &&Pair : Privates) { 2363 auto *VD = Pair.second.PrivateCopy; 2364 auto *Init = VD->getAnyInitializer(); 2365 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 2366 if (Init) { 2367 if (auto *Elem = Pair.second.PrivateElemInit) { 2368 auto *OriginalVD = Pair.second.Original; 2369 auto *SharedField = CapturesInfo.lookup(OriginalVD); 2370 auto SharedRefLValue = 2371 CGF.EmitLValueForField(SharedsBase, SharedField); 2372 SharedRefLValue = CGF.MakeAddrLValue( 2373 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 2374 SharedRefLValue.getType(), AlignmentSource::Decl); 2375 QualType Type = OriginalVD->getType(); 2376 if (Type->isArrayType()) { 2377 // Initialize firstprivate array. 2378 if (!isa<CXXConstructExpr>(Init) || 2379 CGF.isTrivialInitializer(Init)) { 2380 // Perform simple memcpy. 2381 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 2382 SharedRefLValue.getAddress(), Type); 2383 } else { 2384 // Initialize firstprivate array using element-by-element 2385 // intialization. 2386 CGF.EmitOMPAggregateAssign( 2387 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), 2388 Type, [&CGF, Elem, Init, &CapturesInfo]( 2389 Address DestElement, Address SrcElement) { 2390 // Clean up any temporaries needed by the initialization. 2391 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2392 InitScope.addPrivate(Elem, [SrcElement]() -> Address { 2393 return SrcElement; 2394 }); 2395 (void)InitScope.Privatize(); 2396 // Emit initialization for single element. 2397 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 2398 CGF, &CapturesInfo); 2399 CGF.EmitAnyExprToMem(Init, DestElement, 2400 Init->getType().getQualifiers(), 2401 /*IsInitializer=*/false); 2402 }); 2403 } 2404 } else { 2405 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2406 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 2407 return SharedRefLValue.getAddress(); 2408 }); 2409 (void)InitScope.Privatize(); 2410 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 2411 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 2412 /*capturedByInit=*/false); 2413 } 2414 } else { 2415 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 2416 } 2417 } 2418 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); 2419 ++FI; 2420 } 2421 } 2422 // Provide pointer to function with destructors for privates. 2423 llvm::Value *DestructorFn = 2424 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, 2425 KmpTaskTWithPrivatesPtrQTy, 2426 KmpTaskTWithPrivatesQTy) 2427 : llvm::ConstantPointerNull::get( 2428 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 2429 LValue Destructor = CGF.EmitLValueForField( 2430 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); 2431 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2432 DestructorFn, KmpRoutineEntryPtrTy), 2433 Destructor); 2434 2435 // Process list of dependences. 2436 Address DependenciesArray = Address::invalid(); 2437 unsigned NumDependencies = Dependences.size(); 2438 if (NumDependencies) { 2439 // Dependence kind for RTL. 2440 enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 }; 2441 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 2442 RecordDecl *KmpDependInfoRD; 2443 QualType FlagsTy = 2444 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 2445 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 2446 if (KmpDependInfoTy.isNull()) { 2447 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 2448 KmpDependInfoRD->startDefinition(); 2449 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 2450 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 2451 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 2452 KmpDependInfoRD->completeDefinition(); 2453 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 2454 } else { 2455 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 2456 } 2457 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 2458 // Define type kmp_depend_info[<Dependences.size()>]; 2459 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 2460 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 2461 ArrayType::Normal, /*IndexTypeQuals=*/0); 2462 // kmp_depend_info[<Dependences.size()>] deps; 2463 DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); 2464 for (unsigned i = 0; i < NumDependencies; ++i) { 2465 const Expr *E = Dependences[i].second; 2466 auto Addr = CGF.EmitLValue(E); 2467 llvm::Value *Size; 2468 QualType Ty = E->getType(); 2469 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 2470 LValue UpAddrLVal = 2471 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 2472 llvm::Value *UpAddr = 2473 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 2474 llvm::Value *LowIntPtr = 2475 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 2476 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 2477 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 2478 } else 2479 Size = getTypeSize(CGF, Ty); 2480 auto Base = CGF.MakeAddrLValue( 2481 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 2482 KmpDependInfoTy); 2483 // deps[i].base_addr = &<Dependences[i].second>; 2484 auto BaseAddrLVal = CGF.EmitLValueForField( 2485 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 2486 CGF.EmitStoreOfScalar( 2487 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 2488 BaseAddrLVal); 2489 // deps[i].len = sizeof(<Dependences[i].second>); 2490 auto LenLVal = CGF.EmitLValueForField( 2491 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 2492 CGF.EmitStoreOfScalar(Size, LenLVal); 2493 // deps[i].flags = <Dependences[i].first>; 2494 RTLDependenceKindTy DepKind; 2495 switch (Dependences[i].first) { 2496 case OMPC_DEPEND_in: 2497 DepKind = DepIn; 2498 break; 2499 case OMPC_DEPEND_out: 2500 DepKind = DepOut; 2501 break; 2502 case OMPC_DEPEND_inout: 2503 DepKind = DepInOut; 2504 break; 2505 case OMPC_DEPEND_unknown: 2506 llvm_unreachable("Unknown task dependence type"); 2507 } 2508 auto FlagsLVal = CGF.EmitLValueForField( 2509 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 2510 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 2511 FlagsLVal); 2512 } 2513 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2514 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 2515 CGF.VoidPtrTy); 2516 } 2517 2518 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 2519 // libcall. 2520 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2521 // *new_task); 2522 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2523 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2524 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 2525 // list is not empty 2526 auto *ThreadID = getThreadID(CGF, Loc); 2527 auto *UpLoc = emitUpdateLocation(CGF, Loc); 2528 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 2529 llvm::Value *DepTaskArgs[7]; 2530 if (NumDependencies) { 2531 DepTaskArgs[0] = UpLoc; 2532 DepTaskArgs[1] = ThreadID; 2533 DepTaskArgs[2] = NewTask; 2534 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 2535 DepTaskArgs[4] = DependenciesArray.getPointer(); 2536 DepTaskArgs[5] = CGF.Builder.getInt32(0); 2537 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2538 } 2539 auto &&ThenCodeGen = [this, NumDependencies, 2540 &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) { 2541 // TODO: add check for untied tasks. 2542 if (NumDependencies) { 2543 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), 2544 DepTaskArgs); 2545 } else { 2546 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 2547 TaskArgs); 2548 } 2549 }; 2550 typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> 2551 IfCallEndCleanup; 2552 2553 llvm::Value *DepWaitTaskArgs[6]; 2554 if (NumDependencies) { 2555 DepWaitTaskArgs[0] = UpLoc; 2556 DepWaitTaskArgs[1] = ThreadID; 2557 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 2558 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 2559 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 2560 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2561 } 2562 auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 2563 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) { 2564 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 2565 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2566 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 2567 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 2568 // is specified. 2569 if (NumDependencies) 2570 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 2571 DepWaitTaskArgs); 2572 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 2573 // kmp_task_t *new_task); 2574 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), 2575 TaskArgs); 2576 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 2577 // kmp_task_t *new_task); 2578 CGF.EHStack.pushCleanup<IfCallEndCleanup>( 2579 NormalAndEHCleanup, 2580 createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), 2581 llvm::makeArrayRef(TaskArgs)); 2582 2583 // Call proxy_task_entry(gtid, new_task); 2584 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 2585 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 2586 }; 2587 2588 if (IfCond) { 2589 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 2590 } else { 2591 CodeGenFunction::RunCleanupsScope Scope(CGF); 2592 ThenCodeGen(CGF); 2593 } 2594 } 2595 2596 /// \brief Emit reduction operation for each element of array (required for 2597 /// array sections) LHS op = RHS. 2598 /// \param Type Type of array. 2599 /// \param LHSVar Variable on the left side of the reduction operation 2600 /// (references element of array in original variable). 2601 /// \param RHSVar Variable on the right side of the reduction operation 2602 /// (references element of array in original variable). 2603 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 2604 /// RHSVar. 2605 static void EmitOMPAggregateReduction( 2606 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 2607 const VarDecl *RHSVar, 2608 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 2609 const Expr *, const Expr *)> &RedOpGen, 2610 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 2611 const Expr *UpExpr = nullptr) { 2612 // Perform element-by-element initialization. 2613 QualType ElementTy; 2614 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 2615 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 2616 2617 // Drill down to the base element type on both arrays. 2618 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 2619 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 2620 2621 auto RHSBegin = RHSAddr.getPointer(); 2622 auto LHSBegin = LHSAddr.getPointer(); 2623 // Cast from pointer to array type to pointer to single element. 2624 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 2625 // The basic structure here is a while-do loop. 2626 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 2627 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 2628 auto IsEmpty = 2629 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 2630 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 2631 2632 // Enter the loop body, making that address the current address. 2633 auto EntryBB = CGF.Builder.GetInsertBlock(); 2634 CGF.EmitBlock(BodyBB); 2635 2636 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 2637 2638 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 2639 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 2640 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 2641 Address RHSElementCurrent = 2642 Address(RHSElementPHI, 2643 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 2644 2645 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 2646 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 2647 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 2648 Address LHSElementCurrent = 2649 Address(LHSElementPHI, 2650 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 2651 2652 // Emit copy. 2653 CodeGenFunction::OMPPrivateScope Scope(CGF); 2654 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 2655 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 2656 Scope.Privatize(); 2657 RedOpGen(CGF, XExpr, EExpr, UpExpr); 2658 Scope.ForceCleanup(); 2659 2660 // Shift the address forward by one element. 2661 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 2662 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 2663 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 2664 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 2665 // Check whether we've reached the end. 2666 auto Done = 2667 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 2668 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 2669 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 2670 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 2671 2672 // Done. 2673 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 2674 } 2675 2676 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 2677 llvm::Type *ArgsType, 2678 ArrayRef<const Expr *> Privates, 2679 ArrayRef<const Expr *> LHSExprs, 2680 ArrayRef<const Expr *> RHSExprs, 2681 ArrayRef<const Expr *> ReductionOps) { 2682 auto &C = CGM.getContext(); 2683 2684 // void reduction_func(void *LHSArg, void *RHSArg); 2685 FunctionArgList Args; 2686 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2687 C.VoidPtrTy); 2688 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2689 C.VoidPtrTy); 2690 Args.push_back(&LHSArg); 2691 Args.push_back(&RHSArg); 2692 FunctionType::ExtInfo EI; 2693 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 2694 C.VoidTy, Args, EI, /*isVariadic=*/false); 2695 auto *Fn = llvm::Function::Create( 2696 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2697 ".omp.reduction.reduction_func", &CGM.getModule()); 2698 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2699 CodeGenFunction CGF(CGM); 2700 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2701 2702 // Dst = (void*[n])(LHSArg); 2703 // Src = (void*[n])(RHSArg); 2704 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2705 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2706 ArgsType), CGF.getPointerAlign()); 2707 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2708 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2709 ArgsType), CGF.getPointerAlign()); 2710 2711 // ... 2712 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 2713 // ... 2714 CodeGenFunction::OMPPrivateScope Scope(CGF); 2715 auto IPriv = Privates.begin(); 2716 unsigned Idx = 0; 2717 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 2718 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 2719 Scope.addPrivate(RHSVar, [&]() -> Address { 2720 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 2721 }); 2722 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 2723 Scope.addPrivate(LHSVar, [&]() -> Address { 2724 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 2725 }); 2726 QualType PrivTy = (*IPriv)->getType(); 2727 if (PrivTy->isArrayType()) { 2728 // Get array size and emit VLA type. 2729 ++Idx; 2730 Address Elem = 2731 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 2732 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 2733 CodeGenFunction::OpaqueValueMapping OpaqueMap( 2734 CGF, 2735 cast<OpaqueValueExpr>( 2736 CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()), 2737 RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 2738 CGF.EmitVariablyModifiedType(PrivTy); 2739 } 2740 } 2741 Scope.Privatize(); 2742 IPriv = Privates.begin(); 2743 auto ILHS = LHSExprs.begin(); 2744 auto IRHS = RHSExprs.begin(); 2745 for (auto *E : ReductionOps) { 2746 if ((*IPriv)->getType()->isArrayType()) { 2747 // Emit reduction for array section. 2748 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 2749 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 2750 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 2751 [=](CodeGenFunction &CGF, const Expr *, 2752 const Expr *, 2753 const Expr *) { CGF.EmitIgnoredExpr(E); }); 2754 } else 2755 // Emit reduction for array subscript or single variable. 2756 CGF.EmitIgnoredExpr(E); 2757 ++IPriv, ++ILHS, ++IRHS; 2758 } 2759 Scope.ForceCleanup(); 2760 CGF.FinishFunction(); 2761 return Fn; 2762 } 2763 2764 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 2765 ArrayRef<const Expr *> Privates, 2766 ArrayRef<const Expr *> LHSExprs, 2767 ArrayRef<const Expr *> RHSExprs, 2768 ArrayRef<const Expr *> ReductionOps, 2769 bool WithNowait, bool SimpleReduction) { 2770 // Next code should be emitted for reduction: 2771 // 2772 // static kmp_critical_name lock = { 0 }; 2773 // 2774 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 2775 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 2776 // ... 2777 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 2778 // *(Type<n>-1*)rhs[<n>-1]); 2779 // } 2780 // 2781 // ... 2782 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 2783 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2784 // RedList, reduce_func, &<lock>)) { 2785 // case 1: 2786 // ... 2787 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2788 // ... 2789 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2790 // break; 2791 // case 2: 2792 // ... 2793 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2794 // ... 2795 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 2796 // break; 2797 // default:; 2798 // } 2799 // 2800 // if SimpleReduction is true, only the next code is generated: 2801 // ... 2802 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2803 // ... 2804 2805 auto &C = CGM.getContext(); 2806 2807 if (SimpleReduction) { 2808 CodeGenFunction::RunCleanupsScope Scope(CGF); 2809 auto IPriv = Privates.begin(); 2810 auto ILHS = LHSExprs.begin(); 2811 auto IRHS = RHSExprs.begin(); 2812 for (auto *E : ReductionOps) { 2813 if ((*IPriv)->getType()->isArrayType()) { 2814 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 2815 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 2816 EmitOMPAggregateReduction( 2817 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 2818 [=](CodeGenFunction &CGF, const Expr *, const Expr *, 2819 const Expr *) { CGF.EmitIgnoredExpr(E); }); 2820 } else 2821 CGF.EmitIgnoredExpr(E); 2822 ++IPriv, ++ILHS, ++IRHS; 2823 } 2824 return; 2825 } 2826 2827 // 1. Build a list of reduction variables. 2828 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 2829 auto Size = RHSExprs.size(); 2830 for (auto *E : Privates) { 2831 if (E->getType()->isArrayType()) 2832 // Reserve place for array size. 2833 ++Size; 2834 } 2835 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 2836 QualType ReductionArrayTy = 2837 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2838 /*IndexTypeQuals=*/0); 2839 Address ReductionList = 2840 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 2841 auto IPriv = Privates.begin(); 2842 unsigned Idx = 0; 2843 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 2844 Address Elem = 2845 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 2846 CGF.Builder.CreateStore( 2847 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2848 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 2849 Elem); 2850 if ((*IPriv)->getType()->isArrayType()) { 2851 // Store array size. 2852 ++Idx; 2853 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 2854 CGF.getPointerSize()); 2855 CGF.Builder.CreateStore( 2856 CGF.Builder.CreateIntToPtr( 2857 CGF.Builder.CreateIntCast( 2858 CGF.getVLASize(CGF.getContext().getAsVariableArrayType( 2859 (*IPriv)->getType())) 2860 .first, 2861 CGF.SizeTy, /*isSigned=*/false), 2862 CGF.VoidPtrTy), 2863 Elem); 2864 } 2865 } 2866 2867 // 2. Emit reduce_func(). 2868 auto *ReductionFn = emitReductionFunction( 2869 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 2870 LHSExprs, RHSExprs, ReductionOps); 2871 2872 // 3. Create static kmp_critical_name lock = { 0 }; 2873 auto *Lock = getCriticalRegionLock(".reduction"); 2874 2875 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2876 // RedList, reduce_func, &<lock>); 2877 auto *IdentTLoc = emitUpdateLocation( 2878 CGF, Loc, 2879 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); 2880 auto *ThreadId = getThreadID(CGF, Loc); 2881 auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy); 2882 auto *RL = 2883 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), 2884 CGF.VoidPtrTy); 2885 llvm::Value *Args[] = { 2886 IdentTLoc, // ident_t *<loc> 2887 ThreadId, // i32 <gtid> 2888 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 2889 ReductionArrayTySize, // size_type sizeof(RedList) 2890 RL, // void *RedList 2891 ReductionFn, // void (*) (void *, void *) <reduce_func> 2892 Lock // kmp_critical_name *&<lock> 2893 }; 2894 auto Res = CGF.EmitRuntimeCall( 2895 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 2896 : OMPRTL__kmpc_reduce), 2897 Args); 2898 2899 // 5. Build switch(res) 2900 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 2901 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 2902 2903 // 6. Build case 1: 2904 // ... 2905 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2906 // ... 2907 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2908 // break; 2909 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 2910 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 2911 CGF.EmitBlock(Case1BB); 2912 2913 { 2914 CodeGenFunction::RunCleanupsScope Scope(CGF); 2915 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2916 llvm::Value *EndArgs[] = { 2917 IdentTLoc, // ident_t *<loc> 2918 ThreadId, // i32 <gtid> 2919 Lock // kmp_critical_name *&<lock> 2920 }; 2921 CGF.EHStack 2922 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 2923 NormalAndEHCleanup, 2924 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 2925 : OMPRTL__kmpc_end_reduce), 2926 llvm::makeArrayRef(EndArgs)); 2927 auto IPriv = Privates.begin(); 2928 auto ILHS = LHSExprs.begin(); 2929 auto IRHS = RHSExprs.begin(); 2930 for (auto *E : ReductionOps) { 2931 if ((*IPriv)->getType()->isArrayType()) { 2932 // Emit reduction for array section. 2933 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 2934 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 2935 EmitOMPAggregateReduction( 2936 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 2937 [=](CodeGenFunction &CGF, const Expr *, const Expr *, 2938 const Expr *) { CGF.EmitIgnoredExpr(E); }); 2939 } else 2940 // Emit reduction for array subscript or single variable. 2941 CGF.EmitIgnoredExpr(E); 2942 ++IPriv, ++ILHS, ++IRHS; 2943 } 2944 } 2945 2946 CGF.EmitBranch(DefaultBB); 2947 2948 // 7. Build case 2: 2949 // ... 2950 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2951 // ... 2952 // break; 2953 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 2954 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 2955 CGF.EmitBlock(Case2BB); 2956 2957 { 2958 CodeGenFunction::RunCleanupsScope Scope(CGF); 2959 if (!WithNowait) { 2960 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 2961 llvm::Value *EndArgs[] = { 2962 IdentTLoc, // ident_t *<loc> 2963 ThreadId, // i32 <gtid> 2964 Lock // kmp_critical_name *&<lock> 2965 }; 2966 CGF.EHStack 2967 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 2968 NormalAndEHCleanup, 2969 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 2970 llvm::makeArrayRef(EndArgs)); 2971 } 2972 auto ILHS = LHSExprs.begin(); 2973 auto IRHS = RHSExprs.begin(); 2974 auto IPriv = Privates.begin(); 2975 for (auto *E : ReductionOps) { 2976 const Expr *XExpr = nullptr; 2977 const Expr *EExpr = nullptr; 2978 const Expr *UpExpr = nullptr; 2979 BinaryOperatorKind BO = BO_Comma; 2980 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 2981 if (BO->getOpcode() == BO_Assign) { 2982 XExpr = BO->getLHS(); 2983 UpExpr = BO->getRHS(); 2984 } 2985 } 2986 // Try to emit update expression as a simple atomic. 2987 auto *RHSExpr = UpExpr; 2988 if (RHSExpr) { 2989 // Analyze RHS part of the whole expression. 2990 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 2991 RHSExpr->IgnoreParenImpCasts())) { 2992 // If this is a conditional operator, analyze its condition for 2993 // min/max reduction operator. 2994 RHSExpr = ACO->getCond(); 2995 } 2996 if (auto *BORHS = 2997 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 2998 EExpr = BORHS->getRHS(); 2999 BO = BORHS->getOpcode(); 3000 } 3001 } 3002 if (XExpr) { 3003 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3004 auto &&AtomicRedGen = [this, BO, VD, IPriv, 3005 Loc](CodeGenFunction &CGF, const Expr *XExpr, 3006 const Expr *EExpr, const Expr *UpExpr) { 3007 LValue X = CGF.EmitLValue(XExpr); 3008 RValue E; 3009 if (EExpr) 3010 E = CGF.EmitAnyExpr(EExpr); 3011 CGF.EmitOMPAtomicSimpleUpdateExpr( 3012 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, 3013 [&CGF, UpExpr, VD, IPriv](RValue XRValue) { 3014 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3015 PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address { 3016 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 3017 CGF.EmitStoreThroughLValue( 3018 XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType())); 3019 return LHSTemp; 3020 }); 3021 (void)PrivateScope.Privatize(); 3022 return CGF.EmitAnyExpr(UpExpr); 3023 }); 3024 }; 3025 if ((*IPriv)->getType()->isArrayType()) { 3026 // Emit atomic reduction for array section. 3027 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3028 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 3029 AtomicRedGen, XExpr, EExpr, UpExpr); 3030 } else 3031 // Emit atomic reduction for array subscript or single variable. 3032 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 3033 } else { 3034 // Emit as a critical region. 3035 auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *, 3036 const Expr *, const Expr *) { 3037 emitCriticalRegion( 3038 CGF, ".atomic_reduction", 3039 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc); 3040 }; 3041 if ((*IPriv)->getType()->isArrayType()) { 3042 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3043 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3044 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3045 CritRedGen); 3046 } else 3047 CritRedGen(CGF, nullptr, nullptr, nullptr); 3048 } 3049 ++ILHS, ++IRHS, ++IPriv; 3050 } 3051 } 3052 3053 CGF.EmitBranch(DefaultBB); 3054 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 3055 } 3056 3057 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 3058 SourceLocation Loc) { 3059 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 3060 // global_tid); 3061 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3062 // Ignore return result until untied tasks are supported. 3063 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 3064 } 3065 3066 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 3067 OpenMPDirectiveKind InnerKind, 3068 const RegionCodeGenTy &CodeGen, 3069 bool HasCancel) { 3070 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 3071 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 3072 } 3073 3074 namespace { 3075 enum RTCancelKind { 3076 CancelNoreq = 0, 3077 CancelParallel = 1, 3078 CancelLoop = 2, 3079 CancelSections = 3, 3080 CancelTaskgroup = 4 3081 }; 3082 } 3083 3084 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 3085 RTCancelKind CancelKind = CancelNoreq; 3086 if (CancelRegion == OMPD_parallel) 3087 CancelKind = CancelParallel; 3088 else if (CancelRegion == OMPD_for) 3089 CancelKind = CancelLoop; 3090 else if (CancelRegion == OMPD_sections) 3091 CancelKind = CancelSections; 3092 else { 3093 assert(CancelRegion == OMPD_taskgroup); 3094 CancelKind = CancelTaskgroup; 3095 } 3096 return CancelKind; 3097 } 3098 3099 void CGOpenMPRuntime::emitCancellationPointCall( 3100 CodeGenFunction &CGF, SourceLocation Loc, 3101 OpenMPDirectiveKind CancelRegion) { 3102 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 3103 // global_tid, kmp_int32 cncl_kind); 3104 if (auto *OMPRegionInfo = 3105 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3106 if (OMPRegionInfo->getDirectiveKind() == OMPD_single) 3107 return; 3108 if (OMPRegionInfo->hasCancel()) { 3109 llvm::Value *Args[] = { 3110 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3111 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 3112 // Ignore return result until untied tasks are supported. 3113 auto *Result = CGF.EmitRuntimeCall( 3114 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 3115 // if (__kmpc_cancellationpoint()) { 3116 // __kmpc_cancel_barrier(); 3117 // exit from construct; 3118 // } 3119 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3120 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 3121 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 3122 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3123 CGF.EmitBlock(ExitBB); 3124 // __kmpc_cancel_barrier(); 3125 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 3126 // exit from construct; 3127 auto CancelDest = 3128 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3129 CGF.EmitBranchThroughCleanup(CancelDest); 3130 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3131 } 3132 } 3133 } 3134 3135 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 3136 const Expr *IfCond, 3137 OpenMPDirectiveKind CancelRegion) { 3138 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 3139 // kmp_int32 cncl_kind); 3140 if (auto *OMPRegionInfo = 3141 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3142 if (OMPRegionInfo->getDirectiveKind() == OMPD_single) 3143 return; 3144 auto &&ThenGen = [this, Loc, CancelRegion, 3145 OMPRegionInfo](CodeGenFunction &CGF) { 3146 llvm::Value *Args[] = { 3147 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3148 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 3149 // Ignore return result until untied tasks are supported. 3150 auto *Result = 3151 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 3152 // if (__kmpc_cancel()) { 3153 // __kmpc_cancel_barrier(); 3154 // exit from construct; 3155 // } 3156 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3157 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 3158 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 3159 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3160 CGF.EmitBlock(ExitBB); 3161 // __kmpc_cancel_barrier(); 3162 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 3163 // exit from construct; 3164 auto CancelDest = 3165 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3166 CGF.EmitBranchThroughCleanup(CancelDest); 3167 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3168 }; 3169 if (IfCond) 3170 emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {}); 3171 else 3172 ThenGen(CGF); 3173 } 3174 } 3175 3176 llvm::Value * 3177 CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D, 3178 const RegionCodeGenTy &CodeGen) { 3179 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 3180 3181 CodeGenFunction CGF(CGM, true); 3182 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen); 3183 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 3184 return CGF.GenerateOpenMPCapturedStmtFunction(CS, /*UseOnlyReferences=*/true); 3185 } 3186 3187 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 3188 const OMPExecutableDirective &D, 3189 llvm::Value *OutlinedFn, 3190 const Expr *IfCond, const Expr *Device, 3191 ArrayRef<llvm::Value *> CapturedVars) { 3192 /// \brief Values for bit flags used to specify the mapping type for 3193 /// offloading. 3194 enum OpenMPOffloadMappingFlags { 3195 /// \brief Allocate memory on the device and move data from host to device. 3196 OMP_MAP_TO = 0x01, 3197 /// \brief Allocate memory on the device and move data from device to host. 3198 OMP_MAP_FROM = 0x02, 3199 }; 3200 3201 enum OpenMPOffloadingReservedDeviceIDs { 3202 /// \brief Device ID if the device was not defined, runtime should get it 3203 /// from environment variables in the spec. 3204 OMP_DEVICEID_UNDEF = -1, 3205 }; 3206 3207 // Fill up the arrays with the all the captured variables. 3208 SmallVector<llvm::Value *, 16> BasePointers; 3209 SmallVector<llvm::Value *, 16> Pointers; 3210 SmallVector<llvm::Value *, 16> Sizes; 3211 SmallVector<unsigned, 16> MapTypes; 3212 3213 bool hasVLACaptures = false; 3214 3215 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 3216 auto RI = CS.getCapturedRecordDecl()->field_begin(); 3217 // auto II = CS.capture_init_begin(); 3218 auto CV = CapturedVars.begin(); 3219 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 3220 CE = CS.capture_end(); 3221 CI != CE; ++CI, ++RI, ++CV) { 3222 StringRef Name; 3223 QualType Ty; 3224 llvm::Value *BasePointer; 3225 llvm::Value *Pointer; 3226 llvm::Value *Size; 3227 unsigned MapType; 3228 3229 if (CI->capturesVariableArrayType()) { 3230 BasePointer = Pointer = *CV; 3231 Size = getTypeSize(CGF, RI->getType()); 3232 hasVLACaptures = true; 3233 // VLA sizes don't need to be copied back from the device. 3234 MapType = OMP_MAP_TO; 3235 } else if (CI->capturesThis()) { 3236 BasePointer = Pointer = *CV; 3237 const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr()); 3238 Size = getTypeSize(CGF, PtrTy->getPointeeType()); 3239 // Default map type. 3240 MapType = OMP_MAP_TO | OMP_MAP_FROM; 3241 } else { 3242 BasePointer = Pointer = *CV; 3243 3244 const ReferenceType *PtrTy = 3245 cast<ReferenceType>(RI->getType().getTypePtr()); 3246 QualType ElementType = PtrTy->getPointeeType(); 3247 Size = getTypeSize(CGF, ElementType); 3248 // Default map type. 3249 MapType = OMP_MAP_TO | OMP_MAP_FROM; 3250 } 3251 3252 BasePointers.push_back(BasePointer); 3253 Pointers.push_back(Pointer); 3254 Sizes.push_back(Size); 3255 MapTypes.push_back(MapType); 3256 } 3257 3258 // Keep track on whether the host function has to be executed. 3259 auto OffloadErrorQType = 3260 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 3261 auto OffloadError = CGF.MakeAddrLValue( 3262 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 3263 OffloadErrorQType); 3264 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 3265 OffloadError); 3266 3267 // Fill up the pointer arrays and transfer execution to the device. 3268 auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, 3269 hasVLACaptures, Device, OffloadError, 3270 OffloadErrorQType](CodeGenFunction &CGF) { 3271 unsigned PointerNumVal = BasePointers.size(); 3272 llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); 3273 llvm::Value *BasePointersArray; 3274 llvm::Value *PointersArray; 3275 llvm::Value *SizesArray; 3276 llvm::Value *MapTypesArray; 3277 3278 if (PointerNumVal) { 3279 llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); 3280 QualType PointerArrayType = CGF.getContext().getConstantArrayType( 3281 CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal, 3282 /*IndexTypeQuals=*/0); 3283 3284 BasePointersArray = 3285 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 3286 PointersArray = 3287 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 3288 3289 // If we don't have any VLA types, we can use a constant array for the map 3290 // sizes, otherwise we need to fill up the arrays as we do for the 3291 // pointers. 3292 if (hasVLACaptures) { 3293 QualType SizeArrayType = CGF.getContext().getConstantArrayType( 3294 CGF.getContext().getSizeType(), PointerNumAP, ArrayType::Normal, 3295 /*IndexTypeQuals=*/0); 3296 SizesArray = 3297 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 3298 } else { 3299 // We expect all the sizes to be constant, so we collect them to create 3300 // a constant array. 3301 SmallVector<llvm::Constant *, 16> ConstSizes; 3302 for (auto S : Sizes) 3303 ConstSizes.push_back(cast<llvm::Constant>(S)); 3304 3305 auto *SizesArrayInit = llvm::ConstantArray::get( 3306 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 3307 auto *SizesArrayGbl = new llvm::GlobalVariable( 3308 CGM.getModule(), SizesArrayInit->getType(), 3309 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 3310 SizesArrayInit, ".offload_sizes"); 3311 SizesArrayGbl->setUnnamedAddr(true); 3312 SizesArray = SizesArrayGbl; 3313 } 3314 3315 // The map types are always constant so we don't need to generate code to 3316 // fill arrays. Instead, we create an array constant. 3317 llvm::Constant *MapTypesArrayInit = 3318 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 3319 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 3320 CGM.getModule(), MapTypesArrayInit->getType(), 3321 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 3322 MapTypesArrayInit, ".offload_maptypes"); 3323 MapTypesArrayGbl->setUnnamedAddr(true); 3324 MapTypesArray = MapTypesArrayGbl; 3325 3326 for (unsigned i = 0; i < PointerNumVal; ++i) { 3327 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 3328 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), 3329 BasePointersArray, 0, i); 3330 Address BPAddr(BP, CGM.getContext().getTypeAlignInChars( 3331 CGM.getContext().VoidPtrTy)); 3332 CGF.Builder.CreateStore( 3333 CGF.Builder.CreateBitCast(BasePointers[i], CGM.VoidPtrTy), BPAddr); 3334 3335 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 3336 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 3337 0, i); 3338 Address PAddr(P, CGM.getContext().getTypeAlignInChars( 3339 CGM.getContext().VoidPtrTy)); 3340 CGF.Builder.CreateStore( 3341 CGF.Builder.CreateBitCast(Pointers[i], CGM.VoidPtrTy), PAddr); 3342 3343 if (hasVLACaptures) { 3344 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 3345 llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 3346 /*Idx0=*/0, 3347 /*Idx1=*/i); 3348 Address SAddr(S, CGM.getContext().getTypeAlignInChars( 3349 CGM.getContext().getSizeType())); 3350 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast( 3351 Sizes[i], CGM.SizeTy, /*isSigned=*/true), 3352 SAddr); 3353 } 3354 } 3355 3356 BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( 3357 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, 3358 /*Idx0=*/0, /*Idx1=*/0); 3359 PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( 3360 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 3361 /*Idx0=*/0, 3362 /*Idx1=*/0); 3363 SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32( 3364 llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 3365 /*Idx0=*/0, /*Idx1=*/0); 3366 MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32( 3367 llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray, 3368 /*Idx0=*/0, 3369 /*Idx1=*/0); 3370 3371 } else { 3372 BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 3373 PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 3374 SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 3375 MapTypesArray = 3376 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 3377 } 3378 3379 // On top of the arrays that were filled up, the target offloading call 3380 // takes as arguments the device id as well as the host pointer. The host 3381 // pointer is used by the runtime library to identify the current target 3382 // region, so it only has to be unique and not necessarily point to 3383 // anything. It could be the pointer to the outlined function that 3384 // implements the target region, but we aren't using that so that the 3385 // compiler doesn't need to keep that, and could therefore inline the host 3386 // function if proven worthwhile during optimization. 3387 3388 llvm::Value *HostPtr = new llvm::GlobalVariable( 3389 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3390 llvm::GlobalValue::PrivateLinkage, 3391 llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr"); 3392 3393 // Emit device ID if any. 3394 llvm::Value *DeviceID; 3395 if (Device) 3396 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 3397 CGM.Int32Ty, /*isSigned=*/true); 3398 else 3399 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 3400 3401 llvm::Value *OffloadingArgs[] = { 3402 DeviceID, HostPtr, PointerNum, BasePointersArray, 3403 PointersArray, SizesArray, MapTypesArray}; 3404 auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), 3405 OffloadingArgs); 3406 3407 CGF.EmitStoreOfScalar(Return, OffloadError); 3408 }; 3409 3410 if (IfCond) { 3411 // Notify that the host version must be executed. 3412 auto &&ElseGen = [this, OffloadError, 3413 OffloadErrorQType](CodeGenFunction &CGF) { 3414 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), 3415 OffloadError); 3416 }; 3417 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3418 } else { 3419 CodeGenFunction::RunCleanupsScope Scope(CGF); 3420 ThenGen(CGF); 3421 } 3422 3423 // Check the error code and execute the host version if required. 3424 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 3425 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 3426 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 3427 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 3428 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 3429 3430 CGF.EmitBlock(OffloadFailedBlock); 3431 CGF.Builder.CreateCall(OutlinedFn, BasePointers); 3432 CGF.EmitBranch(OffloadContBlock); 3433 3434 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 3435 return; 3436 } 3437