1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CGCleanup.h" 17 #include "clang/AST/Decl.h" 18 #include "clang/AST/StmtOpenMP.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/IR/CallSite.h" 21 #include "llvm/IR/DerivedTypes.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/IR/Value.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <cassert> 26 27 using namespace clang; 28 using namespace CodeGen; 29 30 namespace { 31 /// \brief Base class for handling code generation inside OpenMP regions. 32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 33 public: 34 /// \brief Kinds of OpenMP regions used in codegen. 35 enum CGOpenMPRegionKind { 36 /// \brief Region with outlined function for standalone 'parallel' 37 /// directive. 38 ParallelOutlinedRegion, 39 /// \brief Region with outlined function for standalone 'task' directive. 40 TaskOutlinedRegion, 41 /// \brief Region for constructs that do not require function outlining, 42 /// like 'for', 'sections', 'atomic' etc. directives. 43 InlinedRegion, 44 }; 45 46 CGOpenMPRegionInfo(const CapturedStmt &CS, 47 const CGOpenMPRegionKind RegionKind, 48 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 49 bool HasCancel) 50 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 51 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 52 53 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 57 Kind(Kind), HasCancel(HasCancel) {} 58 59 /// \brief Get a variable or parameter for storing global thread id 60 /// inside OpenMP construct. 61 virtual const VarDecl *getThreadIDVariable() const = 0; 62 63 /// \brief Emit the captured statement body. 64 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 65 66 /// \brief Get an LValue for the current ThreadID variable. 67 /// \return LValue for thread id variable. This LValue always has type int32*. 68 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 69 70 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 71 72 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 73 74 bool hasCancel() const { return HasCancel; } 75 76 static bool classof(const CGCapturedStmtInfo *Info) { 77 return Info->getKind() == CR_OpenMP; 78 } 79 80 protected: 81 CGOpenMPRegionKind RegionKind; 82 const RegionCodeGenTy &CodeGen; 83 OpenMPDirectiveKind Kind; 84 bool HasCancel; 85 }; 86 87 /// \brief API for captured statement code generation in OpenMP constructs. 88 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 89 public: 90 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 91 const RegionCodeGenTy &CodeGen, 92 OpenMPDirectiveKind Kind, bool HasCancel) 93 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 94 HasCancel), 95 ThreadIDVar(ThreadIDVar) { 96 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 97 } 98 /// \brief Get a variable or parameter for storing global thread id 99 /// inside OpenMP construct. 100 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 101 102 /// \brief Get the name of the capture helper. 103 StringRef getHelperName() const override { return ".omp_outlined."; } 104 105 static bool classof(const CGCapturedStmtInfo *Info) { 106 return CGOpenMPRegionInfo::classof(Info) && 107 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 108 ParallelOutlinedRegion; 109 } 110 111 private: 112 /// \brief A variable or parameter storing global thread id for OpenMP 113 /// constructs. 114 const VarDecl *ThreadIDVar; 115 }; 116 117 /// \brief API for captured statement code generation in OpenMP constructs. 118 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 119 public: 120 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 121 const VarDecl *ThreadIDVar, 122 const RegionCodeGenTy &CodeGen, 123 OpenMPDirectiveKind Kind, bool HasCancel) 124 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 125 ThreadIDVar(ThreadIDVar) { 126 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 127 } 128 /// \brief Get a variable or parameter for storing global thread id 129 /// inside OpenMP construct. 130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 131 132 /// \brief Get an LValue for the current ThreadID variable. 133 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 134 135 /// \brief Get the name of the capture helper. 136 StringRef getHelperName() const override { return ".omp_outlined."; } 137 138 static bool classof(const CGCapturedStmtInfo *Info) { 139 return CGOpenMPRegionInfo::classof(Info) && 140 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 141 TaskOutlinedRegion; 142 } 143 144 private: 145 /// \brief A variable or parameter storing global thread id for OpenMP 146 /// constructs. 147 const VarDecl *ThreadIDVar; 148 }; 149 150 /// \brief API for inlined captured statement code generation in OpenMP 151 /// constructs. 152 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 153 public: 154 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 155 const RegionCodeGenTy &CodeGen, 156 OpenMPDirectiveKind Kind, bool HasCancel) 157 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 158 OldCSI(OldCSI), 159 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 160 // \brief Retrieve the value of the context parameter. 161 llvm::Value *getContextValue() const override { 162 if (OuterRegionInfo) 163 return OuterRegionInfo->getContextValue(); 164 llvm_unreachable("No context value for inlined OpenMP region"); 165 } 166 void setContextValue(llvm::Value *V) override { 167 if (OuterRegionInfo) { 168 OuterRegionInfo->setContextValue(V); 169 return; 170 } 171 llvm_unreachable("No context value for inlined OpenMP region"); 172 } 173 /// \brief Lookup the captured field decl for a variable. 174 const FieldDecl *lookup(const VarDecl *VD) const override { 175 if (OuterRegionInfo) 176 return OuterRegionInfo->lookup(VD); 177 // If there is no outer outlined region,no need to lookup in a list of 178 // captured variables, we can use the original one. 179 return nullptr; 180 } 181 FieldDecl *getThisFieldDecl() const override { 182 if (OuterRegionInfo) 183 return OuterRegionInfo->getThisFieldDecl(); 184 return nullptr; 185 } 186 /// \brief Get a variable or parameter for storing global thread id 187 /// inside OpenMP construct. 188 const VarDecl *getThreadIDVariable() const override { 189 if (OuterRegionInfo) 190 return OuterRegionInfo->getThreadIDVariable(); 191 return nullptr; 192 } 193 194 /// \brief Get the name of the capture helper. 195 StringRef getHelperName() const override { 196 if (auto *OuterRegionInfo = getOldCSI()) 197 return OuterRegionInfo->getHelperName(); 198 llvm_unreachable("No helper name for inlined OpenMP construct"); 199 } 200 201 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 202 203 static bool classof(const CGCapturedStmtInfo *Info) { 204 return CGOpenMPRegionInfo::classof(Info) && 205 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 206 } 207 208 private: 209 /// \brief CodeGen info about outer OpenMP region. 210 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 211 CGOpenMPRegionInfo *OuterRegionInfo; 212 }; 213 214 /// \brief RAII for emitting code of OpenMP constructs. 215 class InlinedOpenMPRegionRAII { 216 CodeGenFunction &CGF; 217 218 public: 219 /// \brief Constructs region for combined constructs. 220 /// \param CodeGen Code generation sequence for combined directives. Includes 221 /// a list of functions used for code generation of implicitly inlined 222 /// regions. 223 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 224 OpenMPDirectiveKind Kind, bool HasCancel) 225 : CGF(CGF) { 226 // Start emission for the construct. 227 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 228 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 229 } 230 ~InlinedOpenMPRegionRAII() { 231 // Restore original CapturedStmtInfo only if we're done with code emission. 232 auto *OldCSI = 233 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 234 delete CGF.CapturedStmtInfo; 235 CGF.CapturedStmtInfo = OldCSI; 236 } 237 }; 238 239 } // anonymous namespace 240 241 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr, 242 QualType Ty) { 243 AlignmentSource Source; 244 CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source); 245 return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align), 246 Ty->getPointeeType(), Source); 247 } 248 249 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 250 return emitLoadOfPointerLValue(CGF, 251 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 252 getThreadIDVariable()->getType()); 253 } 254 255 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 256 // 1.2.2 OpenMP Language Terminology 257 // Structured block - An executable statement with a single entry at the 258 // top and a single exit at the bottom. 259 // The point of exit cannot be a branch out of the structured block. 260 // longjmp() and throw() must not violate the entry/exit criteria. 261 CGF.EHStack.pushTerminate(); 262 { 263 CodeGenFunction::RunCleanupsScope Scope(CGF); 264 CodeGen(CGF); 265 } 266 CGF.EHStack.popTerminate(); 267 } 268 269 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 270 CodeGenFunction &CGF) { 271 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 272 getThreadIDVariable()->getType(), 273 AlignmentSource::Decl); 274 } 275 276 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 277 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { 278 IdentTy = llvm::StructType::create( 279 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 280 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 281 CGM.Int8PtrTy /* psource */, nullptr); 282 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 283 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 284 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 285 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 286 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 287 } 288 289 void CGOpenMPRuntime::clear() { 290 InternalVars.clear(); 291 } 292 293 // Layout information for ident_t. 294 static CharUnits getIdentAlign(CodeGenModule &CGM) { 295 return CGM.getPointerAlign(); 296 } 297 static CharUnits getIdentSize(CodeGenModule &CGM) { 298 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 299 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 300 } 301 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) { 302 // All the fields except the last are i32, so this works beautifully. 303 return unsigned(Field) * CharUnits::fromQuantity(4); 304 } 305 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 306 CGOpenMPRuntime::IdentFieldIndex Field, 307 const llvm::Twine &Name = "") { 308 auto Offset = getOffsetOfIdentField(Field); 309 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 310 } 311 312 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 313 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 314 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 315 assert(ThreadIDVar->getType()->isPointerType() && 316 "thread id variable must be of type kmp_int32 *"); 317 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 318 CodeGenFunction CGF(CGM, true); 319 bool HasCancel = false; 320 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 321 HasCancel = OPD->hasCancel(); 322 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 323 HasCancel = OPSD->hasCancel(); 324 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 325 HasCancel = OPFD->hasCancel(); 326 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 327 HasCancel); 328 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 329 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 330 } 331 332 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 333 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 334 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 335 assert(!ThreadIDVar->getType()->isPointerType() && 336 "thread id variable must be of type kmp_int32 for tasks"); 337 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 338 CodeGenFunction CGF(CGM, true); 339 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 340 InnermostKind, 341 cast<OMPTaskDirective>(D).hasCancel()); 342 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 343 return CGF.GenerateCapturedStmtFunction(*CS); 344 } 345 346 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 347 CharUnits Align = getIdentAlign(CGM); 348 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 349 if (!Entry) { 350 if (!DefaultOpenMPPSource) { 351 // Initialize default location for psource field of ident_t structure of 352 // all ident_t objects. Format is ";file;function;line;column;;". 353 // Taken from 354 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 355 DefaultOpenMPPSource = 356 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 357 DefaultOpenMPPSource = 358 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 359 } 360 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 361 CGM.getModule(), IdentTy, /*isConstant*/ true, 362 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 363 DefaultOpenMPLocation->setUnnamedAddr(true); 364 DefaultOpenMPLocation->setAlignment(Align.getQuantity()); 365 366 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 367 llvm::Constant *Values[] = {Zero, 368 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 369 Zero, Zero, DefaultOpenMPPSource}; 370 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 371 DefaultOpenMPLocation->setInitializer(Init); 372 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 373 } 374 return Address(Entry, Align); 375 } 376 377 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 378 SourceLocation Loc, 379 OpenMPLocationFlags Flags) { 380 // If no debug info is generated - return global default location. 381 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 382 Loc.isInvalid()) 383 return getOrCreateDefaultLocation(Flags).getPointer(); 384 385 assert(CGF.CurFn && "No function in current CodeGenFunction."); 386 387 Address LocValue = Address::invalid(); 388 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 389 if (I != OpenMPLocThreadIDMap.end()) 390 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 391 392 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 393 // GetOpenMPThreadID was called before this routine. 394 if (!LocValue.isValid()) { 395 // Generate "ident_t .kmpc_loc.addr;" 396 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 397 ".kmpc_loc.addr"); 398 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 399 Elem.second.DebugLoc = AI.getPointer(); 400 LocValue = AI; 401 402 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 403 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 404 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 405 CGM.getSize(getIdentSize(CGF.CGM))); 406 } 407 408 // char **psource = &.kmpc_loc_<flags>.addr.psource; 409 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 410 411 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 412 if (OMPDebugLoc == nullptr) { 413 SmallString<128> Buffer2; 414 llvm::raw_svector_ostream OS2(Buffer2); 415 // Build debug location 416 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 417 OS2 << ";" << PLoc.getFilename() << ";"; 418 if (const FunctionDecl *FD = 419 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 420 OS2 << FD->getQualifiedNameAsString(); 421 } 422 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 423 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 424 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 425 } 426 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 427 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 428 429 // Our callers always pass this to a runtime function, so for 430 // convenience, go ahead and return a naked pointer. 431 return LocValue.getPointer(); 432 } 433 434 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 435 SourceLocation Loc) { 436 assert(CGF.CurFn && "No function in current CodeGenFunction."); 437 438 llvm::Value *ThreadID = nullptr; 439 // Check whether we've already cached a load of the thread id in this 440 // function. 441 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 442 if (I != OpenMPLocThreadIDMap.end()) { 443 ThreadID = I->second.ThreadID; 444 if (ThreadID != nullptr) 445 return ThreadID; 446 } 447 if (auto OMPRegionInfo = 448 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 449 if (OMPRegionInfo->getThreadIDVariable()) { 450 // Check if this an outlined function with thread id passed as argument. 451 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 452 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 453 // If value loaded in entry block, cache it and use it everywhere in 454 // function. 455 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 456 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 457 Elem.second.ThreadID = ThreadID; 458 } 459 return ThreadID; 460 } 461 } 462 463 // This is not an outlined function region - need to call __kmpc_int32 464 // kmpc_global_thread_num(ident_t *loc). 465 // Generate thread id value and cache this value for use across the 466 // function. 467 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 468 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 469 ThreadID = 470 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 471 emitUpdateLocation(CGF, Loc)); 472 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 473 Elem.second.ThreadID = ThreadID; 474 return ThreadID; 475 } 476 477 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 478 assert(CGF.CurFn && "No function in current CodeGenFunction."); 479 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 480 OpenMPLocThreadIDMap.erase(CGF.CurFn); 481 } 482 483 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 484 return llvm::PointerType::getUnqual(IdentTy); 485 } 486 487 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 488 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 489 } 490 491 llvm::Constant * 492 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 493 llvm::Constant *RTLFn = nullptr; 494 switch (Function) { 495 case OMPRTL__kmpc_fork_call: { 496 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 497 // microtask, ...); 498 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 499 getKmpc_MicroPointerTy()}; 500 llvm::FunctionType *FnTy = 501 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 502 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 503 break; 504 } 505 case OMPRTL__kmpc_global_thread_num: { 506 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 507 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 508 llvm::FunctionType *FnTy = 509 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 510 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 511 break; 512 } 513 case OMPRTL__kmpc_threadprivate_cached: { 514 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 515 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 516 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 517 CGM.VoidPtrTy, CGM.SizeTy, 518 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 519 llvm::FunctionType *FnTy = 520 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 521 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 522 break; 523 } 524 case OMPRTL__kmpc_critical: { 525 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 526 // kmp_critical_name *crit); 527 llvm::Type *TypeParams[] = { 528 getIdentTyPointerTy(), CGM.Int32Ty, 529 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 530 llvm::FunctionType *FnTy = 531 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 532 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 533 break; 534 } 535 case OMPRTL__kmpc_threadprivate_register: { 536 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 537 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 538 // typedef void *(*kmpc_ctor)(void *); 539 auto KmpcCtorTy = 540 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 541 /*isVarArg*/ false)->getPointerTo(); 542 // typedef void *(*kmpc_cctor)(void *, void *); 543 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 544 auto KmpcCopyCtorTy = 545 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 546 /*isVarArg*/ false)->getPointerTo(); 547 // typedef void (*kmpc_dtor)(void *); 548 auto KmpcDtorTy = 549 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 550 ->getPointerTo(); 551 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 552 KmpcCopyCtorTy, KmpcDtorTy}; 553 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 554 /*isVarArg*/ false); 555 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 556 break; 557 } 558 case OMPRTL__kmpc_end_critical: { 559 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 560 // kmp_critical_name *crit); 561 llvm::Type *TypeParams[] = { 562 getIdentTyPointerTy(), CGM.Int32Ty, 563 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 564 llvm::FunctionType *FnTy = 565 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 566 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 567 break; 568 } 569 case OMPRTL__kmpc_cancel_barrier: { 570 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 571 // global_tid); 572 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 573 llvm::FunctionType *FnTy = 574 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 575 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 576 break; 577 } 578 case OMPRTL__kmpc_barrier: { 579 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 580 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 581 llvm::FunctionType *FnTy = 582 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 583 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 584 break; 585 } 586 case OMPRTL__kmpc_for_static_fini: { 587 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 588 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 589 llvm::FunctionType *FnTy = 590 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 591 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 592 break; 593 } 594 case OMPRTL__kmpc_push_num_threads: { 595 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 596 // kmp_int32 num_threads) 597 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 598 CGM.Int32Ty}; 599 llvm::FunctionType *FnTy = 600 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 601 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 602 break; 603 } 604 case OMPRTL__kmpc_serialized_parallel: { 605 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 606 // global_tid); 607 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 608 llvm::FunctionType *FnTy = 609 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 610 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 611 break; 612 } 613 case OMPRTL__kmpc_end_serialized_parallel: { 614 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 615 // global_tid); 616 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 617 llvm::FunctionType *FnTy = 618 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 619 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 620 break; 621 } 622 case OMPRTL__kmpc_flush: { 623 // Build void __kmpc_flush(ident_t *loc); 624 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 625 llvm::FunctionType *FnTy = 626 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 627 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 628 break; 629 } 630 case OMPRTL__kmpc_master: { 631 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 632 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 633 llvm::FunctionType *FnTy = 634 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 635 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 636 break; 637 } 638 case OMPRTL__kmpc_end_master: { 639 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 640 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 641 llvm::FunctionType *FnTy = 642 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 643 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 644 break; 645 } 646 case OMPRTL__kmpc_omp_taskyield: { 647 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 648 // int end_part); 649 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 650 llvm::FunctionType *FnTy = 651 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 652 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 653 break; 654 } 655 case OMPRTL__kmpc_single: { 656 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 657 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 658 llvm::FunctionType *FnTy = 659 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 660 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 661 break; 662 } 663 case OMPRTL__kmpc_end_single: { 664 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 665 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 666 llvm::FunctionType *FnTy = 667 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 668 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 669 break; 670 } 671 case OMPRTL__kmpc_omp_task_alloc: { 672 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 673 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 674 // kmp_routine_entry_t *task_entry); 675 assert(KmpRoutineEntryPtrTy != nullptr && 676 "Type kmp_routine_entry_t must be created."); 677 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 678 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 679 // Return void * and then cast to particular kmp_task_t type. 680 llvm::FunctionType *FnTy = 681 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 682 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 683 break; 684 } 685 case OMPRTL__kmpc_omp_task: { 686 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 687 // *new_task); 688 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 689 CGM.VoidPtrTy}; 690 llvm::FunctionType *FnTy = 691 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 692 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 693 break; 694 } 695 case OMPRTL__kmpc_copyprivate: { 696 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 697 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 698 // kmp_int32 didit); 699 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 700 auto *CpyFnTy = 701 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 702 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 703 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 704 CGM.Int32Ty}; 705 llvm::FunctionType *FnTy = 706 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 707 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 708 break; 709 } 710 case OMPRTL__kmpc_reduce: { 711 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 712 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 713 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 714 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 715 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 716 /*isVarArg=*/false); 717 llvm::Type *TypeParams[] = { 718 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 719 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 720 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 721 llvm::FunctionType *FnTy = 722 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 723 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 724 break; 725 } 726 case OMPRTL__kmpc_reduce_nowait: { 727 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 728 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 729 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 730 // *lck); 731 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 732 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 733 /*isVarArg=*/false); 734 llvm::Type *TypeParams[] = { 735 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 736 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 737 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 738 llvm::FunctionType *FnTy = 739 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 740 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 741 break; 742 } 743 case OMPRTL__kmpc_end_reduce: { 744 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 745 // kmp_critical_name *lck); 746 llvm::Type *TypeParams[] = { 747 getIdentTyPointerTy(), CGM.Int32Ty, 748 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 749 llvm::FunctionType *FnTy = 750 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 751 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 752 break; 753 } 754 case OMPRTL__kmpc_end_reduce_nowait: { 755 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 756 // kmp_critical_name *lck); 757 llvm::Type *TypeParams[] = { 758 getIdentTyPointerTy(), CGM.Int32Ty, 759 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 760 llvm::FunctionType *FnTy = 761 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 762 RTLFn = 763 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 764 break; 765 } 766 case OMPRTL__kmpc_omp_task_begin_if0: { 767 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 768 // *new_task); 769 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 770 CGM.VoidPtrTy}; 771 llvm::FunctionType *FnTy = 772 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 773 RTLFn = 774 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 775 break; 776 } 777 case OMPRTL__kmpc_omp_task_complete_if0: { 778 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 779 // *new_task); 780 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 781 CGM.VoidPtrTy}; 782 llvm::FunctionType *FnTy = 783 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 784 RTLFn = CGM.CreateRuntimeFunction(FnTy, 785 /*Name=*/"__kmpc_omp_task_complete_if0"); 786 break; 787 } 788 case OMPRTL__kmpc_ordered: { 789 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 790 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 791 llvm::FunctionType *FnTy = 792 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 793 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 794 break; 795 } 796 case OMPRTL__kmpc_end_ordered: { 797 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 798 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 799 llvm::FunctionType *FnTy = 800 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 801 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 802 break; 803 } 804 case OMPRTL__kmpc_omp_taskwait: { 805 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 806 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 807 llvm::FunctionType *FnTy = 808 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 809 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 810 break; 811 } 812 case OMPRTL__kmpc_taskgroup: { 813 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 814 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 815 llvm::FunctionType *FnTy = 816 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 817 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 818 break; 819 } 820 case OMPRTL__kmpc_end_taskgroup: { 821 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 822 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 823 llvm::FunctionType *FnTy = 824 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 825 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 826 break; 827 } 828 case OMPRTL__kmpc_push_proc_bind: { 829 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 830 // int proc_bind) 831 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 832 llvm::FunctionType *FnTy = 833 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 834 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 835 break; 836 } 837 case OMPRTL__kmpc_omp_task_with_deps: { 838 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 839 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 840 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 841 llvm::Type *TypeParams[] = { 842 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 843 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 844 llvm::FunctionType *FnTy = 845 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 846 RTLFn = 847 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 848 break; 849 } 850 case OMPRTL__kmpc_omp_wait_deps: { 851 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 852 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 853 // kmp_depend_info_t *noalias_dep_list); 854 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 855 CGM.Int32Ty, CGM.VoidPtrTy, 856 CGM.Int32Ty, CGM.VoidPtrTy}; 857 llvm::FunctionType *FnTy = 858 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 859 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 860 break; 861 } 862 case OMPRTL__kmpc_cancellationpoint: { 863 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 864 // global_tid, kmp_int32 cncl_kind) 865 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 866 llvm::FunctionType *FnTy = 867 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 868 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 869 break; 870 } 871 case OMPRTL__kmpc_cancel: { 872 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 873 // kmp_int32 cncl_kind) 874 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 875 llvm::FunctionType *FnTy = 876 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 877 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 878 break; 879 } 880 } 881 return RTLFn; 882 } 883 884 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 885 bool IVSigned) { 886 assert((IVSize == 32 || IVSize == 64) && 887 "IV size is not compatible with the omp runtime"); 888 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 889 : "__kmpc_for_static_init_4u") 890 : (IVSigned ? "__kmpc_for_static_init_8" 891 : "__kmpc_for_static_init_8u"); 892 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 893 auto PtrTy = llvm::PointerType::getUnqual(ITy); 894 llvm::Type *TypeParams[] = { 895 getIdentTyPointerTy(), // loc 896 CGM.Int32Ty, // tid 897 CGM.Int32Ty, // schedtype 898 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 899 PtrTy, // p_lower 900 PtrTy, // p_upper 901 PtrTy, // p_stride 902 ITy, // incr 903 ITy // chunk 904 }; 905 llvm::FunctionType *FnTy = 906 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 907 return CGM.CreateRuntimeFunction(FnTy, Name); 908 } 909 910 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 911 bool IVSigned) { 912 assert((IVSize == 32 || IVSize == 64) && 913 "IV size is not compatible with the omp runtime"); 914 auto Name = 915 IVSize == 32 916 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 917 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 918 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 919 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 920 CGM.Int32Ty, // tid 921 CGM.Int32Ty, // schedtype 922 ITy, // lower 923 ITy, // upper 924 ITy, // stride 925 ITy // chunk 926 }; 927 llvm::FunctionType *FnTy = 928 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 929 return CGM.CreateRuntimeFunction(FnTy, Name); 930 } 931 932 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 933 bool IVSigned) { 934 assert((IVSize == 32 || IVSize == 64) && 935 "IV size is not compatible with the omp runtime"); 936 auto Name = 937 IVSize == 32 938 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 939 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 940 llvm::Type *TypeParams[] = { 941 getIdentTyPointerTy(), // loc 942 CGM.Int32Ty, // tid 943 }; 944 llvm::FunctionType *FnTy = 945 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 946 return CGM.CreateRuntimeFunction(FnTy, Name); 947 } 948 949 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 950 bool IVSigned) { 951 assert((IVSize == 32 || IVSize == 64) && 952 "IV size is not compatible with the omp runtime"); 953 auto Name = 954 IVSize == 32 955 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 956 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 957 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 958 auto PtrTy = llvm::PointerType::getUnqual(ITy); 959 llvm::Type *TypeParams[] = { 960 getIdentTyPointerTy(), // loc 961 CGM.Int32Ty, // tid 962 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 963 PtrTy, // p_lower 964 PtrTy, // p_upper 965 PtrTy // p_stride 966 }; 967 llvm::FunctionType *FnTy = 968 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 969 return CGM.CreateRuntimeFunction(FnTy, Name); 970 } 971 972 llvm::Constant * 973 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 974 assert(!CGM.getLangOpts().OpenMPUseTLS || 975 !CGM.getContext().getTargetInfo().isTLSSupported()); 976 // Lookup the entry, lazily creating it if necessary. 977 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 978 Twine(CGM.getMangledName(VD)) + ".cache."); 979 } 980 981 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 982 const VarDecl *VD, 983 Address VDAddr, 984 SourceLocation Loc) { 985 if (CGM.getLangOpts().OpenMPUseTLS && 986 CGM.getContext().getTargetInfo().isTLSSupported()) 987 return VDAddr; 988 989 auto VarTy = VDAddr.getElementType(); 990 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 991 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 992 CGM.Int8PtrTy), 993 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 994 getOrCreateThreadPrivateCache(VD)}; 995 return Address(CGF.EmitRuntimeCall( 996 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 997 VDAddr.getAlignment()); 998 } 999 1000 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1001 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1002 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1003 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1004 // library. 1005 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1006 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1007 OMPLoc); 1008 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1009 // to register constructor/destructor for variable. 1010 llvm::Value *Args[] = {OMPLoc, 1011 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1012 CGM.VoidPtrTy), 1013 Ctor, CopyCtor, Dtor}; 1014 CGF.EmitRuntimeCall( 1015 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1016 } 1017 1018 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1019 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1020 bool PerformInit, CodeGenFunction *CGF) { 1021 if (CGM.getLangOpts().OpenMPUseTLS && 1022 CGM.getContext().getTargetInfo().isTLSSupported()) 1023 return nullptr; 1024 1025 VD = VD->getDefinition(CGM.getContext()); 1026 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1027 ThreadPrivateWithDefinition.insert(VD); 1028 QualType ASTTy = VD->getType(); 1029 1030 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1031 auto Init = VD->getAnyInitializer(); 1032 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1033 // Generate function that re-emits the declaration's initializer into the 1034 // threadprivate copy of the variable VD 1035 CodeGenFunction CtorCGF(CGM); 1036 FunctionArgList Args; 1037 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1038 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1039 Args.push_back(&Dst); 1040 1041 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1042 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 1043 /*isVariadic=*/false); 1044 auto FTy = CGM.getTypes().GetFunctionType(FI); 1045 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1046 FTy, ".__kmpc_global_ctor_.", Loc); 1047 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1048 Args, SourceLocation()); 1049 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1050 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1051 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1052 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1053 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1054 CtorCGF.ConvertTypeForMem(ASTTy)); 1055 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1056 /*IsInitializer=*/true); 1057 ArgVal = CtorCGF.EmitLoadOfScalar( 1058 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1059 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1060 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1061 CtorCGF.FinishFunction(); 1062 Ctor = Fn; 1063 } 1064 if (VD->getType().isDestructedType() != QualType::DK_none) { 1065 // Generate function that emits destructor call for the threadprivate copy 1066 // of the variable VD 1067 CodeGenFunction DtorCGF(CGM); 1068 FunctionArgList Args; 1069 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1070 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1071 Args.push_back(&Dst); 1072 1073 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1074 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 1075 /*isVariadic=*/false); 1076 auto FTy = CGM.getTypes().GetFunctionType(FI); 1077 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1078 FTy, ".__kmpc_global_dtor_.", Loc); 1079 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1080 SourceLocation()); 1081 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1082 DtorCGF.GetAddrOfLocalVar(&Dst), 1083 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1084 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1085 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1086 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1087 DtorCGF.FinishFunction(); 1088 Dtor = Fn; 1089 } 1090 // Do not emit init function if it is not required. 1091 if (!Ctor && !Dtor) 1092 return nullptr; 1093 1094 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1095 auto CopyCtorTy = 1096 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1097 /*isVarArg=*/false)->getPointerTo(); 1098 // Copying constructor for the threadprivate variable. 1099 // Must be NULL - reserved by runtime, but currently it requires that this 1100 // parameter is always NULL. Otherwise it fires assertion. 1101 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1102 if (Ctor == nullptr) { 1103 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1104 /*isVarArg=*/false)->getPointerTo(); 1105 Ctor = llvm::Constant::getNullValue(CtorTy); 1106 } 1107 if (Dtor == nullptr) { 1108 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1109 /*isVarArg=*/false)->getPointerTo(); 1110 Dtor = llvm::Constant::getNullValue(DtorTy); 1111 } 1112 if (!CGF) { 1113 auto InitFunctionTy = 1114 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1115 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1116 InitFunctionTy, ".__omp_threadprivate_init_."); 1117 CodeGenFunction InitCGF(CGM); 1118 FunctionArgList ArgList; 1119 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1120 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1121 Loc); 1122 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1123 InitCGF.FinishFunction(); 1124 return InitFunction; 1125 } 1126 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1127 } 1128 return nullptr; 1129 } 1130 1131 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1132 /// function. Here is the logic: 1133 /// if (Cond) { 1134 /// ThenGen(); 1135 /// } else { 1136 /// ElseGen(); 1137 /// } 1138 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1139 const RegionCodeGenTy &ThenGen, 1140 const RegionCodeGenTy &ElseGen) { 1141 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1142 1143 // If the condition constant folds and can be elided, try to avoid emitting 1144 // the condition and the dead arm of the if/else. 1145 bool CondConstant; 1146 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1147 CodeGenFunction::RunCleanupsScope Scope(CGF); 1148 if (CondConstant) { 1149 ThenGen(CGF); 1150 } else { 1151 ElseGen(CGF); 1152 } 1153 return; 1154 } 1155 1156 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1157 // emit the conditional branch. 1158 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1159 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1160 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1161 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1162 1163 // Emit the 'then' code. 1164 CGF.EmitBlock(ThenBlock); 1165 { 1166 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1167 ThenGen(CGF); 1168 } 1169 CGF.EmitBranch(ContBlock); 1170 // Emit the 'else' code if present. 1171 { 1172 // There is no need to emit line number for unconditional branch. 1173 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1174 CGF.EmitBlock(ElseBlock); 1175 } 1176 { 1177 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1178 ElseGen(CGF); 1179 } 1180 { 1181 // There is no need to emit line number for unconditional branch. 1182 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1183 CGF.EmitBranch(ContBlock); 1184 } 1185 // Emit the continuation block for code after the if. 1186 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1187 } 1188 1189 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1190 llvm::Value *OutlinedFn, 1191 ArrayRef<llvm::Value *> CapturedVars, 1192 const Expr *IfCond) { 1193 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1194 auto &&ThenGen = [this, OutlinedFn, CapturedVars, 1195 RTLoc](CodeGenFunction &CGF) { 1196 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1197 llvm::Value *Args[] = { 1198 RTLoc, 1199 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1200 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 1201 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1202 RealArgs.append(std::begin(Args), std::end(Args)); 1203 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1204 1205 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 1206 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1207 }; 1208 auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, 1209 Loc](CodeGenFunction &CGF) { 1210 auto ThreadID = getThreadID(CGF, Loc); 1211 // Build calls: 1212 // __kmpc_serialized_parallel(&Loc, GTid); 1213 llvm::Value *Args[] = {RTLoc, ThreadID}; 1214 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 1215 Args); 1216 1217 // OutlinedFn(>id, &zero, CapturedStruct); 1218 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 1219 Address ZeroAddr = 1220 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1221 /*Name*/ ".zero.addr"); 1222 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1223 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1224 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1225 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1226 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1227 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1228 1229 // __kmpc_end_serialized_parallel(&Loc, GTid); 1230 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 1231 CGF.EmitRuntimeCall( 1232 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 1233 }; 1234 if (IfCond) { 1235 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1236 } else { 1237 CodeGenFunction::RunCleanupsScope Scope(CGF); 1238 ThenGen(CGF); 1239 } 1240 } 1241 1242 // If we're inside an (outlined) parallel region, use the region info's 1243 // thread-ID variable (it is passed in a first argument of the outlined function 1244 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1245 // regular serial code region, get thread ID by calling kmp_int32 1246 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1247 // return the address of that temp. 1248 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1249 SourceLocation Loc) { 1250 if (auto OMPRegionInfo = 1251 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1252 if (OMPRegionInfo->getThreadIDVariable()) 1253 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1254 1255 auto ThreadID = getThreadID(CGF, Loc); 1256 auto Int32Ty = 1257 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1258 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1259 CGF.EmitStoreOfScalar(ThreadID, 1260 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1261 1262 return ThreadIDTemp; 1263 } 1264 1265 llvm::Constant * 1266 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1267 const llvm::Twine &Name) { 1268 SmallString<256> Buffer; 1269 llvm::raw_svector_ostream Out(Buffer); 1270 Out << Name; 1271 auto RuntimeName = Out.str(); 1272 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1273 if (Elem.second) { 1274 assert(Elem.second->getType()->getPointerElementType() == Ty && 1275 "OMP internal variable has different type than requested"); 1276 return &*Elem.second; 1277 } 1278 1279 return Elem.second = new llvm::GlobalVariable( 1280 CGM.getModule(), Ty, /*IsConstant*/ false, 1281 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1282 Elem.first()); 1283 } 1284 1285 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1286 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1287 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1288 } 1289 1290 namespace { 1291 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup { 1292 llvm::Value *Callee; 1293 llvm::Value *Args[N]; 1294 1295 public: 1296 CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) 1297 : Callee(Callee) { 1298 assert(CleanupArgs.size() == N); 1299 std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); 1300 } 1301 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 1302 CGF.EmitRuntimeCall(Callee, Args); 1303 } 1304 }; 1305 } // anonymous namespace 1306 1307 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1308 StringRef CriticalName, 1309 const RegionCodeGenTy &CriticalOpGen, 1310 SourceLocation Loc) { 1311 // __kmpc_critical(ident_t *, gtid, Lock); 1312 // CriticalOpGen(); 1313 // __kmpc_end_critical(ident_t *, gtid, Lock); 1314 // Prepare arguments and build a call to __kmpc_critical 1315 { 1316 CodeGenFunction::RunCleanupsScope Scope(CGF); 1317 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1318 getCriticalRegionLock(CriticalName)}; 1319 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 1320 // Build a call to __kmpc_end_critical 1321 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1322 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), 1323 llvm::makeArrayRef(Args)); 1324 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 1325 } 1326 } 1327 1328 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 1329 OpenMPDirectiveKind Kind, SourceLocation Loc, 1330 const RegionCodeGenTy &BodyOpGen) { 1331 llvm::Value *CallBool = CGF.EmitScalarConversion( 1332 IfCond, 1333 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 1334 CGF.getContext().BoolTy, Loc); 1335 1336 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1337 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 1338 // Generate the branch (If-stmt) 1339 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1340 CGF.EmitBlock(ThenBlock); 1341 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); 1342 // Emit the rest of bblocks/branches 1343 CGF.EmitBranch(ContBlock); 1344 CGF.EmitBlock(ContBlock, true); 1345 } 1346 1347 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 1348 const RegionCodeGenTy &MasterOpGen, 1349 SourceLocation Loc) { 1350 // if(__kmpc_master(ident_t *, gtid)) { 1351 // MasterOpGen(); 1352 // __kmpc_end_master(ident_t *, gtid); 1353 // } 1354 // Prepare arguments and build a call to __kmpc_master 1355 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1356 auto *IsMaster = 1357 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 1358 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1359 MasterCallEndCleanup; 1360 emitIfStmt( 1361 CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { 1362 CodeGenFunction::RunCleanupsScope Scope(CGF); 1363 CGF.EHStack.pushCleanup<MasterCallEndCleanup>( 1364 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), 1365 llvm::makeArrayRef(Args)); 1366 MasterOpGen(CGF); 1367 }); 1368 } 1369 1370 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 1371 SourceLocation Loc) { 1372 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 1373 llvm::Value *Args[] = { 1374 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1375 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 1376 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 1377 } 1378 1379 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 1380 const RegionCodeGenTy &TaskgroupOpGen, 1381 SourceLocation Loc) { 1382 // __kmpc_taskgroup(ident_t *, gtid); 1383 // TaskgroupOpGen(); 1384 // __kmpc_end_taskgroup(ident_t *, gtid); 1385 // Prepare arguments and build a call to __kmpc_taskgroup 1386 { 1387 CodeGenFunction::RunCleanupsScope Scope(CGF); 1388 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1389 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); 1390 // Build a call to __kmpc_end_taskgroup 1391 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1392 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 1393 llvm::makeArrayRef(Args)); 1394 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 1395 } 1396 } 1397 1398 /// Given an array of pointers to variables, project the address of a 1399 /// given variable. 1400 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, 1401 Address Array, unsigned Index, 1402 const VarDecl *Var) { 1403 // Pull out the pointer to the variable. 1404 Address PtrAddr = 1405 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 1406 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 1407 1408 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 1409 Addr = CGF.Builder.CreateElementBitCast(Addr, 1410 CGF.ConvertTypeForMem(Var->getType())); 1411 return Addr; 1412 } 1413 1414 static llvm::Value *emitCopyprivateCopyFunction( 1415 CodeGenModule &CGM, llvm::Type *ArgsType, 1416 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 1417 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 1418 auto &C = CGM.getContext(); 1419 // void copy_func(void *LHSArg, void *RHSArg); 1420 FunctionArgList Args; 1421 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1422 C.VoidPtrTy); 1423 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1424 C.VoidPtrTy); 1425 Args.push_back(&LHSArg); 1426 Args.push_back(&RHSArg); 1427 FunctionType::ExtInfo EI; 1428 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1429 C.VoidTy, Args, EI, /*isVariadic=*/false); 1430 auto *Fn = llvm::Function::Create( 1431 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1432 ".omp.copyprivate.copy_func", &CGM.getModule()); 1433 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 1434 CodeGenFunction CGF(CGM); 1435 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1436 // Dest = (void*[n])(LHSArg); 1437 // Src = (void*[n])(RHSArg); 1438 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1439 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 1440 ArgsType), CGF.getPointerAlign()); 1441 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1442 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 1443 ArgsType), CGF.getPointerAlign()); 1444 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1445 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1446 // ... 1447 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1448 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1449 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 1450 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 1451 1452 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 1453 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 1454 1455 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 1456 QualType Type = VD->getType(); 1457 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 1458 } 1459 CGF.FinishFunction(); 1460 return Fn; 1461 } 1462 1463 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1464 const RegionCodeGenTy &SingleOpGen, 1465 SourceLocation Loc, 1466 ArrayRef<const Expr *> CopyprivateVars, 1467 ArrayRef<const Expr *> SrcExprs, 1468 ArrayRef<const Expr *> DstExprs, 1469 ArrayRef<const Expr *> AssignmentOps) { 1470 assert(CopyprivateVars.size() == SrcExprs.size() && 1471 CopyprivateVars.size() == DstExprs.size() && 1472 CopyprivateVars.size() == AssignmentOps.size()); 1473 auto &C = CGM.getContext(); 1474 // int32 did_it = 0; 1475 // if(__kmpc_single(ident_t *, gtid)) { 1476 // SingleOpGen(); 1477 // __kmpc_end_single(ident_t *, gtid); 1478 // did_it = 1; 1479 // } 1480 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1481 // <copy_func>, did_it); 1482 1483 Address DidIt = Address::invalid(); 1484 if (!CopyprivateVars.empty()) { 1485 // int32 did_it = 0; 1486 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1487 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 1488 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 1489 } 1490 // Prepare arguments and build a call to __kmpc_single 1491 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1492 auto *IsSingle = 1493 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 1494 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1495 SingleCallEndCleanup; 1496 emitIfStmt( 1497 CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { 1498 CodeGenFunction::RunCleanupsScope Scope(CGF); 1499 CGF.EHStack.pushCleanup<SingleCallEndCleanup>( 1500 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), 1501 llvm::makeArrayRef(Args)); 1502 SingleOpGen(CGF); 1503 if (DidIt.isValid()) { 1504 // did_it = 1; 1505 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 1506 } 1507 }); 1508 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1509 // <copy_func>, did_it); 1510 if (DidIt.isValid()) { 1511 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 1512 auto CopyprivateArrayTy = 1513 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1514 /*IndexTypeQuals=*/0); 1515 // Create a list of all private variables for copyprivate. 1516 Address CopyprivateList = 1517 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 1518 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 1519 Address Elem = CGF.Builder.CreateConstArrayGEP( 1520 CopyprivateList, I, CGF.getPointerSize()); 1521 CGF.Builder.CreateStore( 1522 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1523 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 1524 Elem); 1525 } 1526 // Build function that copies private values from single region to all other 1527 // threads in the corresponding parallel region. 1528 auto *CpyFn = emitCopyprivateCopyFunction( 1529 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 1530 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 1531 auto *BufSize = llvm::ConstantInt::get( 1532 CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity()); 1533 Address CL = 1534 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 1535 CGF.VoidPtrTy); 1536 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 1537 llvm::Value *Args[] = { 1538 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 1539 getThreadID(CGF, Loc), // i32 <gtid> 1540 BufSize, // size_t <buf_size> 1541 CL.getPointer(), // void *<copyprivate list> 1542 CpyFn, // void (*) (void *, void *) <copy_func> 1543 DidItVal // i32 did_it 1544 }; 1545 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 1546 } 1547 } 1548 1549 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 1550 const RegionCodeGenTy &OrderedOpGen, 1551 SourceLocation Loc) { 1552 // __kmpc_ordered(ident_t *, gtid); 1553 // OrderedOpGen(); 1554 // __kmpc_end_ordered(ident_t *, gtid); 1555 // Prepare arguments and build a call to __kmpc_ordered 1556 { 1557 CodeGenFunction::RunCleanupsScope Scope(CGF); 1558 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1559 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); 1560 // Build a call to __kmpc_end_ordered 1561 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1562 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), 1563 llvm::makeArrayRef(Args)); 1564 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 1565 } 1566 } 1567 1568 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 1569 OpenMPDirectiveKind Kind, bool EmitChecks, 1570 bool ForceSimpleCall) { 1571 // Build call __kmpc_cancel_barrier(loc, thread_id); 1572 // Build call __kmpc_barrier(loc, thread_id); 1573 OpenMPLocationFlags Flags = OMP_IDENT_KMPC; 1574 if (Kind == OMPD_for) { 1575 Flags = 1576 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); 1577 } else if (Kind == OMPD_sections) { 1578 Flags = static_cast<OpenMPLocationFlags>(Flags | 1579 OMP_IDENT_BARRIER_IMPL_SECTIONS); 1580 } else if (Kind == OMPD_single) { 1581 Flags = 1582 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); 1583 } else if (Kind == OMPD_barrier) { 1584 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); 1585 } else { 1586 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); 1587 } 1588 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 1589 // thread_id); 1590 auto *OMPRegionInfo = 1591 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 1592 // Do not emit barrier call in the single directive emitted in some rare cases 1593 // for sections directives. 1594 if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single) 1595 return; 1596 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 1597 getThreadID(CGF, Loc)}; 1598 if (OMPRegionInfo) { 1599 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 1600 auto *Result = CGF.EmitRuntimeCall( 1601 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 1602 if (EmitChecks) { 1603 // if (__kmpc_cancel_barrier()) { 1604 // exit from construct; 1605 // } 1606 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 1607 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 1608 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 1609 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 1610 CGF.EmitBlock(ExitBB); 1611 // exit from construct; 1612 auto CancelDestination = 1613 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 1614 CGF.EmitBranchThroughCleanup(CancelDestination); 1615 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 1616 } 1617 return; 1618 } 1619 } 1620 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 1621 } 1622 1623 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 1624 /// the enum sched_type in kmp.h). 1625 enum OpenMPSchedType { 1626 /// \brief Lower bound for default (unordered) versions. 1627 OMP_sch_lower = 32, 1628 OMP_sch_static_chunked = 33, 1629 OMP_sch_static = 34, 1630 OMP_sch_dynamic_chunked = 35, 1631 OMP_sch_guided_chunked = 36, 1632 OMP_sch_runtime = 37, 1633 OMP_sch_auto = 38, 1634 /// \brief Lower bound for 'ordered' versions. 1635 OMP_ord_lower = 64, 1636 OMP_ord_static_chunked = 65, 1637 OMP_ord_static = 66, 1638 OMP_ord_dynamic_chunked = 67, 1639 OMP_ord_guided_chunked = 68, 1640 OMP_ord_runtime = 69, 1641 OMP_ord_auto = 70, 1642 OMP_sch_default = OMP_sch_static, 1643 }; 1644 1645 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 1646 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 1647 bool Chunked, bool Ordered) { 1648 switch (ScheduleKind) { 1649 case OMPC_SCHEDULE_static: 1650 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 1651 : (Ordered ? OMP_ord_static : OMP_sch_static); 1652 case OMPC_SCHEDULE_dynamic: 1653 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 1654 case OMPC_SCHEDULE_guided: 1655 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 1656 case OMPC_SCHEDULE_runtime: 1657 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 1658 case OMPC_SCHEDULE_auto: 1659 return Ordered ? OMP_ord_auto : OMP_sch_auto; 1660 case OMPC_SCHEDULE_unknown: 1661 assert(!Chunked && "chunk was specified but schedule kind not known"); 1662 return Ordered ? OMP_ord_static : OMP_sch_static; 1663 } 1664 llvm_unreachable("Unexpected runtime schedule"); 1665 } 1666 1667 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 1668 bool Chunked) const { 1669 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 1670 return Schedule == OMP_sch_static; 1671 } 1672 1673 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 1674 auto Schedule = 1675 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 1676 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 1677 return Schedule != OMP_sch_static; 1678 } 1679 1680 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 1681 SourceLocation Loc, 1682 OpenMPScheduleClauseKind ScheduleKind, 1683 unsigned IVSize, bool IVSigned, 1684 bool Ordered, llvm::Value *UB, 1685 llvm::Value *Chunk) { 1686 OpenMPSchedType Schedule = 1687 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1688 assert(Ordered || 1689 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 1690 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); 1691 // Call __kmpc_dispatch_init( 1692 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 1693 // kmp_int[32|64] lower, kmp_int[32|64] upper, 1694 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 1695 1696 // If the Chunk was not specified in the clause - use default value 1. 1697 if (Chunk == nullptr) 1698 Chunk = CGF.Builder.getIntN(IVSize, 1); 1699 llvm::Value *Args[] = { 1700 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1701 getThreadID(CGF, Loc), 1702 CGF.Builder.getInt32(Schedule), // Schedule type 1703 CGF.Builder.getIntN(IVSize, 0), // Lower 1704 UB, // Upper 1705 CGF.Builder.getIntN(IVSize, 1), // Stride 1706 Chunk // Chunk 1707 }; 1708 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 1709 } 1710 1711 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 1712 SourceLocation Loc, 1713 OpenMPScheduleClauseKind ScheduleKind, 1714 unsigned IVSize, bool IVSigned, 1715 bool Ordered, Address IL, Address LB, 1716 Address UB, Address ST, 1717 llvm::Value *Chunk) { 1718 OpenMPSchedType Schedule = 1719 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1720 assert(!Ordered); 1721 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 1722 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked); 1723 1724 // Call __kmpc_for_static_init( 1725 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 1726 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 1727 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 1728 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 1729 if (Chunk == nullptr) { 1730 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && 1731 "expected static non-chunked schedule"); 1732 // If the Chunk was not specified in the clause - use default value 1. 1733 Chunk = CGF.Builder.getIntN(IVSize, 1); 1734 } else { 1735 assert((Schedule == OMP_sch_static_chunked || 1736 Schedule == OMP_ord_static_chunked) && 1737 "expected static chunked schedule"); 1738 } 1739 llvm::Value *Args[] = { 1740 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1741 getThreadID(CGF, Loc), 1742 CGF.Builder.getInt32(Schedule), // Schedule type 1743 IL.getPointer(), // &isLastIter 1744 LB.getPointer(), // &LB 1745 UB.getPointer(), // &UB 1746 ST.getPointer(), // &Stride 1747 CGF.Builder.getIntN(IVSize, 1), // Incr 1748 Chunk // Chunk 1749 }; 1750 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); 1751 } 1752 1753 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 1754 SourceLocation Loc) { 1755 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1756 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1757 getThreadID(CGF, Loc)}; 1758 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1759 Args); 1760 } 1761 1762 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 1763 SourceLocation Loc, 1764 unsigned IVSize, 1765 bool IVSigned) { 1766 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 1767 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1768 getThreadID(CGF, Loc)}; 1769 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 1770 } 1771 1772 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 1773 SourceLocation Loc, unsigned IVSize, 1774 bool IVSigned, Address IL, 1775 Address LB, Address UB, 1776 Address ST) { 1777 // Call __kmpc_dispatch_next( 1778 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 1779 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 1780 // kmp_int[32|64] *p_stride); 1781 llvm::Value *Args[] = { 1782 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 1783 IL.getPointer(), // &isLastIter 1784 LB.getPointer(), // &Lower 1785 UB.getPointer(), // &Upper 1786 ST.getPointer() // &Stride 1787 }; 1788 llvm::Value *Call = 1789 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 1790 return CGF.EmitScalarConversion( 1791 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 1792 CGF.getContext().BoolTy, Loc); 1793 } 1794 1795 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1796 llvm::Value *NumThreads, 1797 SourceLocation Loc) { 1798 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1799 llvm::Value *Args[] = { 1800 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1801 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1802 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1803 Args); 1804 } 1805 1806 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 1807 OpenMPProcBindClauseKind ProcBind, 1808 SourceLocation Loc) { 1809 // Constants for proc bind value accepted by the runtime. 1810 enum ProcBindTy { 1811 ProcBindFalse = 0, 1812 ProcBindTrue, 1813 ProcBindMaster, 1814 ProcBindClose, 1815 ProcBindSpread, 1816 ProcBindIntel, 1817 ProcBindDefault 1818 } RuntimeProcBind; 1819 switch (ProcBind) { 1820 case OMPC_PROC_BIND_master: 1821 RuntimeProcBind = ProcBindMaster; 1822 break; 1823 case OMPC_PROC_BIND_close: 1824 RuntimeProcBind = ProcBindClose; 1825 break; 1826 case OMPC_PROC_BIND_spread: 1827 RuntimeProcBind = ProcBindSpread; 1828 break; 1829 case OMPC_PROC_BIND_unknown: 1830 llvm_unreachable("Unsupported proc_bind value."); 1831 } 1832 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 1833 llvm::Value *Args[] = { 1834 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1835 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 1836 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 1837 } 1838 1839 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1840 SourceLocation Loc) { 1841 // Build call void __kmpc_flush(ident_t *loc) 1842 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1843 emitUpdateLocation(CGF, Loc)); 1844 } 1845 1846 namespace { 1847 /// \brief Indexes of fields for type kmp_task_t. 1848 enum KmpTaskTFields { 1849 /// \brief List of shared variables. 1850 KmpTaskTShareds, 1851 /// \brief Task routine. 1852 KmpTaskTRoutine, 1853 /// \brief Partition id for the untied tasks. 1854 KmpTaskTPartId, 1855 /// \brief Function with call of destructors for private variables. 1856 KmpTaskTDestructors, 1857 }; 1858 } // anonymous namespace 1859 1860 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 1861 if (!KmpRoutineEntryPtrTy) { 1862 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 1863 auto &C = CGM.getContext(); 1864 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 1865 FunctionProtoType::ExtProtoInfo EPI; 1866 KmpRoutineEntryPtrQTy = C.getPointerType( 1867 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 1868 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 1869 } 1870 } 1871 1872 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1873 QualType FieldTy) { 1874 auto *Field = FieldDecl::Create( 1875 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1876 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1877 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1878 Field->setAccess(AS_public); 1879 DC->addDecl(Field); 1880 return Field; 1881 } 1882 1883 namespace { 1884 struct PrivateHelpersTy { 1885 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 1886 const VarDecl *PrivateElemInit) 1887 : Original(Original), PrivateCopy(PrivateCopy), 1888 PrivateElemInit(PrivateElemInit) {} 1889 const VarDecl *Original; 1890 const VarDecl *PrivateCopy; 1891 const VarDecl *PrivateElemInit; 1892 }; 1893 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 1894 } // anonymous namespace 1895 1896 static RecordDecl * 1897 createPrivatesRecordDecl(CodeGenModule &CGM, 1898 const ArrayRef<PrivateDataTy> Privates) { 1899 if (!Privates.empty()) { 1900 auto &C = CGM.getContext(); 1901 // Build struct .kmp_privates_t. { 1902 // /* private vars */ 1903 // }; 1904 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 1905 RD->startDefinition(); 1906 for (auto &&Pair : Privates) { 1907 auto *VD = Pair.second.Original; 1908 auto Type = VD->getType(); 1909 Type = Type.getNonReferenceType(); 1910 auto *FD = addFieldToRecordDecl(C, RD, Type); 1911 if (VD->hasAttrs()) { 1912 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 1913 E(VD->getAttrs().end()); 1914 I != E; ++I) 1915 FD->addAttr(*I); 1916 } 1917 } 1918 RD->completeDefinition(); 1919 return RD; 1920 } 1921 return nullptr; 1922 } 1923 1924 static RecordDecl * 1925 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, 1926 QualType KmpRoutineEntryPointerQTy) { 1927 auto &C = CGM.getContext(); 1928 // Build struct kmp_task_t { 1929 // void * shareds; 1930 // kmp_routine_entry_t routine; 1931 // kmp_int32 part_id; 1932 // kmp_routine_entry_t destructors; 1933 // }; 1934 auto *RD = C.buildImplicitRecord("kmp_task_t"); 1935 RD->startDefinition(); 1936 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1937 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1938 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1939 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1940 RD->completeDefinition(); 1941 return RD; 1942 } 1943 1944 static RecordDecl * 1945 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 1946 const ArrayRef<PrivateDataTy> Privates) { 1947 auto &C = CGM.getContext(); 1948 // Build struct kmp_task_t_with_privates { 1949 // kmp_task_t task_data; 1950 // .kmp_privates_t. privates; 1951 // }; 1952 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 1953 RD->startDefinition(); 1954 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 1955 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 1956 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 1957 } 1958 RD->completeDefinition(); 1959 return RD; 1960 } 1961 1962 /// \brief Emit a proxy function which accepts kmp_task_t as the second 1963 /// argument. 1964 /// \code 1965 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 1966 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, 1967 /// tt->shareds); 1968 /// return 0; 1969 /// } 1970 /// \endcode 1971 static llvm::Value * 1972 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 1973 QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, 1974 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 1975 QualType SharedsPtrTy, llvm::Value *TaskFunction, 1976 llvm::Value *TaskPrivatesMap) { 1977 auto &C = CGM.getContext(); 1978 FunctionArgList Args; 1979 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 1980 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 1981 /*Id=*/nullptr, 1982 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 1983 Args.push_back(&GtidArg); 1984 Args.push_back(&TaskTypeArg); 1985 FunctionType::ExtInfo Info; 1986 auto &TaskEntryFnInfo = 1987 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 1988 /*isVariadic=*/false); 1989 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 1990 auto *TaskEntry = 1991 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 1992 ".omp_task_entry.", &CGM.getModule()); 1993 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry); 1994 CodeGenFunction CGF(CGM); 1995 CGF.disableDebugInfo(); 1996 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 1997 1998 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 1999 // tt->task_data.shareds); 2000 auto *GtidParam = CGF.EmitLoadOfScalar( 2001 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 2002 LValue TDBase = emitLoadOfPointerLValue( 2003 CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); 2004 auto *KmpTaskTWithPrivatesQTyRD = 2005 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2006 LValue Base = 2007 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2008 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2009 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 2010 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 2011 auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); 2012 2013 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 2014 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 2015 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2016 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 2017 CGF.ConvertTypeForMem(SharedsPtrTy)); 2018 2019 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 2020 llvm::Value *PrivatesParam; 2021 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 2022 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 2023 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2024 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 2025 } else { 2026 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2027 } 2028 2029 llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, 2030 TaskPrivatesMap, SharedsParam}; 2031 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 2032 CGF.EmitStoreThroughLValue( 2033 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 2034 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 2035 CGF.FinishFunction(); 2036 return TaskEntry; 2037 } 2038 2039 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 2040 SourceLocation Loc, 2041 QualType KmpInt32Ty, 2042 QualType KmpTaskTWithPrivatesPtrQTy, 2043 QualType KmpTaskTWithPrivatesQTy) { 2044 auto &C = CGM.getContext(); 2045 FunctionArgList Args; 2046 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 2047 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 2048 /*Id=*/nullptr, 2049 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 2050 Args.push_back(&GtidArg); 2051 Args.push_back(&TaskTypeArg); 2052 FunctionType::ExtInfo Info; 2053 auto &DestructorFnInfo = 2054 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 2055 /*isVariadic=*/false); 2056 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 2057 auto *DestructorFn = 2058 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 2059 ".omp_task_destructor.", &CGM.getModule()); 2060 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn); 2061 CodeGenFunction CGF(CGM); 2062 CGF.disableDebugInfo(); 2063 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 2064 Args); 2065 2066 LValue Base = emitLoadOfPointerLValue( 2067 CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); 2068 auto *KmpTaskTWithPrivatesQTyRD = 2069 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2070 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2071 Base = CGF.EmitLValueForField(Base, *FI); 2072 for (auto *Field : 2073 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 2074 if (auto DtorKind = Field->getType().isDestructedType()) { 2075 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 2076 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 2077 } 2078 } 2079 CGF.FinishFunction(); 2080 return DestructorFn; 2081 } 2082 2083 /// \brief Emit a privates mapping function for correct handling of private and 2084 /// firstprivate variables. 2085 /// \code 2086 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 2087 /// **noalias priv1,..., <tyn> **noalias privn) { 2088 /// *priv1 = &.privates.priv1; 2089 /// ...; 2090 /// *privn = &.privates.privn; 2091 /// } 2092 /// \endcode 2093 static llvm::Value * 2094 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 2095 const ArrayRef<const Expr *> PrivateVars, 2096 const ArrayRef<const Expr *> FirstprivateVars, 2097 QualType PrivatesQTy, 2098 const ArrayRef<PrivateDataTy> Privates) { 2099 auto &C = CGM.getContext(); 2100 FunctionArgList Args; 2101 ImplicitParamDecl TaskPrivatesArg( 2102 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 2103 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 2104 Args.push_back(&TaskPrivatesArg); 2105 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 2106 unsigned Counter = 1; 2107 for (auto *E: PrivateVars) { 2108 Args.push_back(ImplicitParamDecl::Create( 2109 C, /*DC=*/nullptr, Loc, 2110 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2111 .withConst() 2112 .withRestrict())); 2113 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2114 PrivateVarsPos[VD] = Counter; 2115 ++Counter; 2116 } 2117 for (auto *E : FirstprivateVars) { 2118 Args.push_back(ImplicitParamDecl::Create( 2119 C, /*DC=*/nullptr, Loc, 2120 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2121 .withConst() 2122 .withRestrict())); 2123 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2124 PrivateVarsPos[VD] = Counter; 2125 ++Counter; 2126 } 2127 FunctionType::ExtInfo Info; 2128 auto &TaskPrivatesMapFnInfo = 2129 CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info, 2130 /*isVariadic=*/false); 2131 auto *TaskPrivatesMapTy = 2132 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 2133 auto *TaskPrivatesMap = llvm::Function::Create( 2134 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 2135 ".omp_task_privates_map.", &CGM.getModule()); 2136 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo, 2137 TaskPrivatesMap); 2138 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 2139 CodeGenFunction CGF(CGM); 2140 CGF.disableDebugInfo(); 2141 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 2142 TaskPrivatesMapFnInfo, Args); 2143 2144 // *privi = &.privates.privi; 2145 LValue Base = emitLoadOfPointerLValue( 2146 CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()); 2147 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 2148 Counter = 0; 2149 for (auto *Field : PrivatesQTyRD->fields()) { 2150 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 2151 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 2152 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 2153 auto RefLoadLVal = 2154 emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType()); 2155 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 2156 ++Counter; 2157 } 2158 CGF.FinishFunction(); 2159 return TaskPrivatesMap; 2160 } 2161 2162 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) { 2163 auto &C = CGF.getContext(); 2164 llvm::Value *Size; 2165 auto SizeInChars = C.getTypeSizeInChars(Ty); 2166 if (SizeInChars.isZero()) { 2167 // getTypeSizeInChars() returns 0 for a VLA. 2168 Size = nullptr; 2169 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 2170 llvm::Value *ArraySize; 2171 std::tie(ArraySize, Ty) = CGF.getVLASize(VAT); 2172 Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 2173 } 2174 SizeInChars = C.getTypeSizeInChars(Ty); 2175 assert(!SizeInChars.isZero()); 2176 Size = CGF.Builder.CreateNUWMul( 2177 Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity())); 2178 } else 2179 Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()); 2180 return Size; 2181 } 2182 2183 static int array_pod_sort_comparator(const PrivateDataTy *P1, 2184 const PrivateDataTy *P2) { 2185 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 2186 } 2187 2188 void CGOpenMPRuntime::emitTaskCall( 2189 CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, 2190 bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 2191 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 2192 const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, 2193 ArrayRef<const Expr *> PrivateCopies, 2194 ArrayRef<const Expr *> FirstprivateVars, 2195 ArrayRef<const Expr *> FirstprivateCopies, 2196 ArrayRef<const Expr *> FirstprivateInits, 2197 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { 2198 auto &C = CGM.getContext(); 2199 llvm::SmallVector<PrivateDataTy, 8> Privates; 2200 // Aggregate privates and sort them by the alignment. 2201 auto I = PrivateCopies.begin(); 2202 for (auto *E : PrivateVars) { 2203 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2204 Privates.push_back(std::make_pair( 2205 C.getDeclAlign(VD), 2206 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2207 /*PrivateElemInit=*/nullptr))); 2208 ++I; 2209 } 2210 I = FirstprivateCopies.begin(); 2211 auto IElemInitRef = FirstprivateInits.begin(); 2212 for (auto *E : FirstprivateVars) { 2213 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2214 Privates.push_back(std::make_pair( 2215 C.getDeclAlign(VD), 2216 PrivateHelpersTy( 2217 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2218 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 2219 ++I, ++IElemInitRef; 2220 } 2221 llvm::array_pod_sort(Privates.begin(), Privates.end(), 2222 array_pod_sort_comparator); 2223 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2224 // Build type kmp_routine_entry_t (if not built yet). 2225 emitKmpRoutineEntryT(KmpInt32Ty); 2226 // Build type kmp_task_t (if not built yet). 2227 if (KmpTaskTQTy.isNull()) { 2228 KmpTaskTQTy = C.getRecordType( 2229 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); 2230 } 2231 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2232 // Build particular struct kmp_task_t for the given task. 2233 auto *KmpTaskTWithPrivatesQTyRD = 2234 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 2235 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 2236 QualType KmpTaskTWithPrivatesPtrQTy = 2237 C.getPointerType(KmpTaskTWithPrivatesQTy); 2238 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 2239 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 2240 auto KmpTaskTWithPrivatesTySize = 2241 CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy)); 2242 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 2243 2244 // Emit initial values for private copies (if any). 2245 llvm::Value *TaskPrivatesMap = nullptr; 2246 auto *TaskPrivatesMapTy = 2247 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 2248 3) 2249 ->getType(); 2250 if (!Privates.empty()) { 2251 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2252 TaskPrivatesMap = emitTaskPrivateMappingFunction( 2253 CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); 2254 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2255 TaskPrivatesMap, TaskPrivatesMapTy); 2256 } else { 2257 TaskPrivatesMap = llvm::ConstantPointerNull::get( 2258 cast<llvm::PointerType>(TaskPrivatesMapTy)); 2259 } 2260 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 2261 // kmp_task_t *tt); 2262 auto *TaskEntry = emitProxyTaskFunction( 2263 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, 2264 KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); 2265 2266 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2267 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2268 // kmp_routine_entry_t *task_entry); 2269 // Task flags. Format is taken from 2270 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 2271 // description of kmp_tasking_flags struct. 2272 const unsigned TiedFlag = 0x1; 2273 const unsigned FinalFlag = 0x2; 2274 unsigned Flags = Tied ? TiedFlag : 0; 2275 auto *TaskFlags = 2276 Final.getPointer() 2277 ? CGF.Builder.CreateSelect(Final.getPointer(), 2278 CGF.Builder.getInt32(FinalFlag), 2279 CGF.Builder.getInt32(/*C=*/0)) 2280 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 2281 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 2282 auto SharedsSize = C.getTypeSizeInChars(SharedsTy); 2283 llvm::Value *AllocArgs[] = { 2284 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags, 2285 KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize), 2286 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry, 2287 KmpRoutineEntryPtrTy)}; 2288 auto *NewTask = CGF.EmitRuntimeCall( 2289 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 2290 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2291 NewTask, KmpTaskTWithPrivatesPtrTy); 2292 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 2293 KmpTaskTWithPrivatesQTy); 2294 LValue TDBase = 2295 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2296 // Fill the data in the resulting kmp_task_t record. 2297 // Copy shareds if there are any. 2298 Address KmpTaskSharedsPtr = Address::invalid(); 2299 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 2300 KmpTaskSharedsPtr = 2301 Address(CGF.EmitLoadOfScalar( 2302 CGF.EmitLValueForField( 2303 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 2304 KmpTaskTShareds)), 2305 Loc), 2306 CGF.getNaturalTypeAlignment(SharedsTy)); 2307 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 2308 } 2309 // Emit initial values for private copies (if any). 2310 bool NeedsCleanup = false; 2311 if (!Privates.empty()) { 2312 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2313 auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); 2314 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 2315 LValue SharedsBase; 2316 if (!FirstprivateVars.empty()) { 2317 SharedsBase = CGF.MakeAddrLValue( 2318 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2319 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 2320 SharedsTy); 2321 } 2322 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 2323 cast<CapturedStmt>(*D.getAssociatedStmt())); 2324 for (auto &&Pair : Privates) { 2325 auto *VD = Pair.second.PrivateCopy; 2326 auto *Init = VD->getAnyInitializer(); 2327 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 2328 if (Init) { 2329 if (auto *Elem = Pair.second.PrivateElemInit) { 2330 auto *OriginalVD = Pair.second.Original; 2331 auto *SharedField = CapturesInfo.lookup(OriginalVD); 2332 auto SharedRefLValue = 2333 CGF.EmitLValueForField(SharedsBase, SharedField); 2334 SharedRefLValue = CGF.MakeAddrLValue( 2335 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 2336 SharedRefLValue.getType(), AlignmentSource::Decl); 2337 QualType Type = OriginalVD->getType(); 2338 if (Type->isArrayType()) { 2339 // Initialize firstprivate array. 2340 if (!isa<CXXConstructExpr>(Init) || 2341 CGF.isTrivialInitializer(Init)) { 2342 // Perform simple memcpy. 2343 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 2344 SharedRefLValue.getAddress(), Type); 2345 } else { 2346 // Initialize firstprivate array using element-by-element 2347 // intialization. 2348 CGF.EmitOMPAggregateAssign( 2349 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), 2350 Type, [&CGF, Elem, Init, &CapturesInfo]( 2351 Address DestElement, Address SrcElement) { 2352 // Clean up any temporaries needed by the initialization. 2353 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2354 InitScope.addPrivate(Elem, [SrcElement]() -> Address { 2355 return SrcElement; 2356 }); 2357 (void)InitScope.Privatize(); 2358 // Emit initialization for single element. 2359 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 2360 CGF, &CapturesInfo); 2361 CGF.EmitAnyExprToMem(Init, DestElement, 2362 Init->getType().getQualifiers(), 2363 /*IsInitializer=*/false); 2364 }); 2365 } 2366 } else { 2367 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2368 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 2369 return SharedRefLValue.getAddress(); 2370 }); 2371 (void)InitScope.Privatize(); 2372 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 2373 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 2374 /*capturedByInit=*/false); 2375 } 2376 } else { 2377 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 2378 } 2379 } 2380 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); 2381 ++FI; 2382 } 2383 } 2384 // Provide pointer to function with destructors for privates. 2385 llvm::Value *DestructorFn = 2386 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, 2387 KmpTaskTWithPrivatesPtrQTy, 2388 KmpTaskTWithPrivatesQTy) 2389 : llvm::ConstantPointerNull::get( 2390 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 2391 LValue Destructor = CGF.EmitLValueForField( 2392 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); 2393 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2394 DestructorFn, KmpRoutineEntryPtrTy), 2395 Destructor); 2396 2397 // Process list of dependences. 2398 Address DependenciesArray = Address::invalid(); 2399 unsigned NumDependencies = Dependences.size(); 2400 if (NumDependencies) { 2401 // Dependence kind for RTL. 2402 enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 }; 2403 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 2404 RecordDecl *KmpDependInfoRD; 2405 QualType FlagsTy = C.getIntTypeForBitwidth( 2406 C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false); 2407 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 2408 if (KmpDependInfoTy.isNull()) { 2409 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 2410 KmpDependInfoRD->startDefinition(); 2411 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 2412 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 2413 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 2414 KmpDependInfoRD->completeDefinition(); 2415 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 2416 } else { 2417 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 2418 } 2419 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 2420 // Define type kmp_depend_info[<Dependences.size()>]; 2421 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 2422 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 2423 ArrayType::Normal, /*IndexTypeQuals=*/0); 2424 // kmp_depend_info[<Dependences.size()>] deps; 2425 DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); 2426 for (unsigned i = 0; i < NumDependencies; ++i) { 2427 const Expr *E = Dependences[i].second; 2428 auto Addr = CGF.EmitLValue(E); 2429 llvm::Value *Size; 2430 QualType Ty = E->getType(); 2431 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 2432 LValue UpAddrLVal = 2433 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 2434 llvm::Value *UpAddr = 2435 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 2436 llvm::Value *LowIntPtr = 2437 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 2438 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 2439 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 2440 } else { 2441 Size = getTypeSize(CGF, Ty); 2442 } 2443 auto Base = CGF.MakeAddrLValue( 2444 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 2445 KmpDependInfoTy); 2446 // deps[i].base_addr = &<Dependences[i].second>; 2447 auto BaseAddrLVal = CGF.EmitLValueForField( 2448 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 2449 CGF.EmitStoreOfScalar( 2450 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 2451 BaseAddrLVal); 2452 // deps[i].len = sizeof(<Dependences[i].second>); 2453 auto LenLVal = CGF.EmitLValueForField( 2454 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 2455 CGF.EmitStoreOfScalar(Size, LenLVal); 2456 // deps[i].flags = <Dependences[i].first>; 2457 RTLDependenceKindTy DepKind; 2458 switch (Dependences[i].first) { 2459 case OMPC_DEPEND_in: 2460 DepKind = DepIn; 2461 break; 2462 case OMPC_DEPEND_out: 2463 DepKind = DepOut; 2464 break; 2465 case OMPC_DEPEND_inout: 2466 DepKind = DepInOut; 2467 break; 2468 case OMPC_DEPEND_unknown: 2469 llvm_unreachable("Unknown task dependence type"); 2470 } 2471 auto FlagsLVal = CGF.EmitLValueForField( 2472 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 2473 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 2474 FlagsLVal); 2475 } 2476 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2477 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 2478 CGF.VoidPtrTy); 2479 } 2480 2481 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 2482 // libcall. 2483 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2484 // *new_task); 2485 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2486 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2487 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 2488 // list is not empty 2489 auto *ThreadID = getThreadID(CGF, Loc); 2490 auto *UpLoc = emitUpdateLocation(CGF, Loc); 2491 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 2492 llvm::Value *DepTaskArgs[7]; 2493 if (NumDependencies) { 2494 DepTaskArgs[0] = UpLoc; 2495 DepTaskArgs[1] = ThreadID; 2496 DepTaskArgs[2] = NewTask; 2497 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 2498 DepTaskArgs[4] = DependenciesArray.getPointer(); 2499 DepTaskArgs[5] = CGF.Builder.getInt32(0); 2500 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2501 } 2502 auto &&ThenCodeGen = [this, NumDependencies, 2503 &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) { 2504 // TODO: add check for untied tasks. 2505 if (NumDependencies) { 2506 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), 2507 DepTaskArgs); 2508 } else { 2509 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 2510 TaskArgs); 2511 } 2512 }; 2513 typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> 2514 IfCallEndCleanup; 2515 2516 llvm::Value *DepWaitTaskArgs[6]; 2517 if (NumDependencies) { 2518 DepWaitTaskArgs[0] = UpLoc; 2519 DepWaitTaskArgs[1] = ThreadID; 2520 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 2521 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 2522 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 2523 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2524 } 2525 auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 2526 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) { 2527 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 2528 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2529 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 2530 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 2531 // is specified. 2532 if (NumDependencies) 2533 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 2534 DepWaitTaskArgs); 2535 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 2536 // kmp_task_t *new_task); 2537 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), 2538 TaskArgs); 2539 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 2540 // kmp_task_t *new_task); 2541 CGF.EHStack.pushCleanup<IfCallEndCleanup>( 2542 NormalAndEHCleanup, 2543 createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), 2544 llvm::makeArrayRef(TaskArgs)); 2545 2546 // Call proxy_task_entry(gtid, new_task); 2547 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 2548 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 2549 }; 2550 2551 if (IfCond) { 2552 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 2553 } else { 2554 CodeGenFunction::RunCleanupsScope Scope(CGF); 2555 ThenCodeGen(CGF); 2556 } 2557 } 2558 2559 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 2560 llvm::Type *ArgsType, 2561 ArrayRef<const Expr *> LHSExprs, 2562 ArrayRef<const Expr *> RHSExprs, 2563 ArrayRef<const Expr *> ReductionOps) { 2564 auto &C = CGM.getContext(); 2565 2566 // void reduction_func(void *LHSArg, void *RHSArg); 2567 FunctionArgList Args; 2568 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2569 C.VoidPtrTy); 2570 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2571 C.VoidPtrTy); 2572 Args.push_back(&LHSArg); 2573 Args.push_back(&RHSArg); 2574 FunctionType::ExtInfo EI; 2575 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 2576 C.VoidTy, Args, EI, /*isVariadic=*/false); 2577 auto *Fn = llvm::Function::Create( 2578 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2579 ".omp.reduction.reduction_func", &CGM.getModule()); 2580 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 2581 CodeGenFunction CGF(CGM); 2582 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2583 2584 // Dst = (void*[n])(LHSArg); 2585 // Src = (void*[n])(RHSArg); 2586 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2587 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2588 ArgsType), CGF.getPointerAlign()); 2589 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2590 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2591 ArgsType), CGF.getPointerAlign()); 2592 2593 // ... 2594 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 2595 // ... 2596 CodeGenFunction::OMPPrivateScope Scope(CGF); 2597 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) { 2598 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 2599 Scope.addPrivate(RHSVar, [&]() -> Address { 2600 return emitAddrOfVarFromArray(CGF, RHS, I, RHSVar); 2601 }); 2602 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 2603 Scope.addPrivate(LHSVar, [&]() -> Address { 2604 return emitAddrOfVarFromArray(CGF, LHS, I, LHSVar); 2605 }); 2606 } 2607 Scope.Privatize(); 2608 for (auto *E : ReductionOps) { 2609 CGF.EmitIgnoredExpr(E); 2610 } 2611 Scope.ForceCleanup(); 2612 CGF.FinishFunction(); 2613 return Fn; 2614 } 2615 2616 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 2617 ArrayRef<const Expr *> LHSExprs, 2618 ArrayRef<const Expr *> RHSExprs, 2619 ArrayRef<const Expr *> ReductionOps, 2620 bool WithNowait, bool SimpleReduction) { 2621 // Next code should be emitted for reduction: 2622 // 2623 // static kmp_critical_name lock = { 0 }; 2624 // 2625 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 2626 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 2627 // ... 2628 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 2629 // *(Type<n>-1*)rhs[<n>-1]); 2630 // } 2631 // 2632 // ... 2633 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 2634 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2635 // RedList, reduce_func, &<lock>)) { 2636 // case 1: 2637 // ... 2638 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2639 // ... 2640 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2641 // break; 2642 // case 2: 2643 // ... 2644 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2645 // ... 2646 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 2647 // break; 2648 // default:; 2649 // } 2650 // 2651 // if SimpleReduction is true, only the next code is generated: 2652 // ... 2653 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2654 // ... 2655 2656 auto &C = CGM.getContext(); 2657 2658 if (SimpleReduction) { 2659 CodeGenFunction::RunCleanupsScope Scope(CGF); 2660 for (auto *E : ReductionOps) { 2661 CGF.EmitIgnoredExpr(E); 2662 } 2663 return; 2664 } 2665 2666 // 1. Build a list of reduction variables. 2667 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 2668 llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size()); 2669 QualType ReductionArrayTy = 2670 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2671 /*IndexTypeQuals=*/0); 2672 Address ReductionList = 2673 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 2674 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) { 2675 Address Elem = 2676 CGF.Builder.CreateConstArrayGEP(ReductionList, I, CGF.getPointerSize()); 2677 CGF.Builder.CreateStore( 2678 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2679 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 2680 Elem); 2681 } 2682 2683 // 2. Emit reduce_func(). 2684 auto *ReductionFn = emitReductionFunction( 2685 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs, 2686 RHSExprs, ReductionOps); 2687 2688 // 3. Create static kmp_critical_name lock = { 0 }; 2689 auto *Lock = getCriticalRegionLock(".reduction"); 2690 2691 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2692 // RedList, reduce_func, &<lock>); 2693 auto *IdentTLoc = emitUpdateLocation( 2694 CGF, Loc, 2695 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); 2696 auto *ThreadId = getThreadID(CGF, Loc); 2697 auto *ReductionArrayTySize = llvm::ConstantInt::get( 2698 CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity()); 2699 auto *RL = 2700 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), 2701 CGF.VoidPtrTy); 2702 llvm::Value *Args[] = { 2703 IdentTLoc, // ident_t *<loc> 2704 ThreadId, // i32 <gtid> 2705 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 2706 ReductionArrayTySize, // size_type sizeof(RedList) 2707 RL, // void *RedList 2708 ReductionFn, // void (*) (void *, void *) <reduce_func> 2709 Lock // kmp_critical_name *&<lock> 2710 }; 2711 auto Res = CGF.EmitRuntimeCall( 2712 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 2713 : OMPRTL__kmpc_reduce), 2714 Args); 2715 2716 // 5. Build switch(res) 2717 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 2718 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 2719 2720 // 6. Build case 1: 2721 // ... 2722 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2723 // ... 2724 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2725 // break; 2726 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 2727 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 2728 CGF.EmitBlock(Case1BB); 2729 2730 { 2731 CodeGenFunction::RunCleanupsScope Scope(CGF); 2732 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2733 llvm::Value *EndArgs[] = { 2734 IdentTLoc, // ident_t *<loc> 2735 ThreadId, // i32 <gtid> 2736 Lock // kmp_critical_name *&<lock> 2737 }; 2738 CGF.EHStack 2739 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 2740 NormalAndEHCleanup, 2741 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 2742 : OMPRTL__kmpc_end_reduce), 2743 llvm::makeArrayRef(EndArgs)); 2744 for (auto *E : ReductionOps) { 2745 CGF.EmitIgnoredExpr(E); 2746 } 2747 } 2748 2749 CGF.EmitBranch(DefaultBB); 2750 2751 // 7. Build case 2: 2752 // ... 2753 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2754 // ... 2755 // break; 2756 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 2757 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 2758 CGF.EmitBlock(Case2BB); 2759 2760 { 2761 CodeGenFunction::RunCleanupsScope Scope(CGF); 2762 if (!WithNowait) { 2763 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 2764 llvm::Value *EndArgs[] = { 2765 IdentTLoc, // ident_t *<loc> 2766 ThreadId, // i32 <gtid> 2767 Lock // kmp_critical_name *&<lock> 2768 }; 2769 CGF.EHStack 2770 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 2771 NormalAndEHCleanup, 2772 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 2773 llvm::makeArrayRef(EndArgs)); 2774 } 2775 auto I = LHSExprs.begin(); 2776 for (auto *E : ReductionOps) { 2777 const Expr *XExpr = nullptr; 2778 const Expr *EExpr = nullptr; 2779 const Expr *UpExpr = nullptr; 2780 BinaryOperatorKind BO = BO_Comma; 2781 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 2782 if (BO->getOpcode() == BO_Assign) { 2783 XExpr = BO->getLHS(); 2784 UpExpr = BO->getRHS(); 2785 } 2786 } 2787 // Try to emit update expression as a simple atomic. 2788 auto *RHSExpr = UpExpr; 2789 if (RHSExpr) { 2790 // Analyze RHS part of the whole expression. 2791 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 2792 RHSExpr->IgnoreParenImpCasts())) { 2793 // If this is a conditional operator, analyze its condition for 2794 // min/max reduction operator. 2795 RHSExpr = ACO->getCond(); 2796 } 2797 if (auto *BORHS = 2798 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 2799 EExpr = BORHS->getRHS(); 2800 BO = BORHS->getOpcode(); 2801 } 2802 } 2803 if (XExpr) { 2804 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2805 LValue X = CGF.EmitLValue(XExpr); 2806 RValue E; 2807 if (EExpr) 2808 E = CGF.EmitAnyExpr(EExpr); 2809 CGF.EmitOMPAtomicSimpleUpdateExpr( 2810 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, 2811 [&CGF, UpExpr, VD](RValue XRValue) { 2812 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 2813 PrivateScope.addPrivate( 2814 VD, [&CGF, VD, XRValue]() -> Address { 2815 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 2816 CGF.EmitStoreThroughLValue( 2817 XRValue, 2818 CGF.MakeAddrLValue(LHSTemp, VD->getType())); 2819 return LHSTemp; 2820 }); 2821 (void)PrivateScope.Privatize(); 2822 return CGF.EmitAnyExpr(UpExpr); 2823 }); 2824 } else { 2825 // Emit as a critical region. 2826 emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) { 2827 CGF.EmitIgnoredExpr(E); 2828 }, Loc); 2829 } 2830 ++I; 2831 } 2832 } 2833 2834 CGF.EmitBranch(DefaultBB); 2835 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 2836 } 2837 2838 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 2839 SourceLocation Loc) { 2840 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 2841 // global_tid); 2842 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2843 // Ignore return result until untied tasks are supported. 2844 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 2845 } 2846 2847 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 2848 OpenMPDirectiveKind InnerKind, 2849 const RegionCodeGenTy &CodeGen, 2850 bool HasCancel) { 2851 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 2852 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 2853 } 2854 2855 namespace { 2856 enum RTCancelKind { 2857 CancelNoreq = 0, 2858 CancelParallel = 1, 2859 CancelLoop = 2, 2860 CancelSections = 3, 2861 CancelTaskgroup = 4 2862 }; 2863 } 2864 2865 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 2866 RTCancelKind CancelKind = CancelNoreq; 2867 if (CancelRegion == OMPD_parallel) 2868 CancelKind = CancelParallel; 2869 else if (CancelRegion == OMPD_for) 2870 CancelKind = CancelLoop; 2871 else if (CancelRegion == OMPD_sections) 2872 CancelKind = CancelSections; 2873 else { 2874 assert(CancelRegion == OMPD_taskgroup); 2875 CancelKind = CancelTaskgroup; 2876 } 2877 return CancelKind; 2878 } 2879 2880 void CGOpenMPRuntime::emitCancellationPointCall( 2881 CodeGenFunction &CGF, SourceLocation Loc, 2882 OpenMPDirectiveKind CancelRegion) { 2883 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2884 // global_tid, kmp_int32 cncl_kind); 2885 if (auto *OMPRegionInfo = 2886 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2887 if (OMPRegionInfo->getDirectiveKind() == OMPD_single) 2888 return; 2889 if (OMPRegionInfo->hasCancel()) { 2890 llvm::Value *Args[] = { 2891 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2892 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 2893 // Ignore return result until untied tasks are supported. 2894 auto *Result = CGF.EmitRuntimeCall( 2895 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 2896 // if (__kmpc_cancellationpoint()) { 2897 // __kmpc_cancel_barrier(); 2898 // exit from construct; 2899 // } 2900 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2901 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2902 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2903 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2904 CGF.EmitBlock(ExitBB); 2905 // __kmpc_cancel_barrier(); 2906 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 2907 // exit from construct; 2908 auto CancelDest = 2909 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2910 CGF.EmitBranchThroughCleanup(CancelDest); 2911 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2912 } 2913 } 2914 } 2915 2916 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 2917 const Expr *IfCond, 2918 OpenMPDirectiveKind CancelRegion) { 2919 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2920 // kmp_int32 cncl_kind); 2921 if (auto *OMPRegionInfo = 2922 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2923 if (OMPRegionInfo->getDirectiveKind() == OMPD_single) 2924 return; 2925 auto &&ThenGen = [this, Loc, CancelRegion, 2926 OMPRegionInfo](CodeGenFunction &CGF) { 2927 llvm::Value *Args[] = { 2928 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2929 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 2930 // Ignore return result until untied tasks are supported. 2931 auto *Result = 2932 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 2933 // if (__kmpc_cancel()) { 2934 // __kmpc_cancel_barrier(); 2935 // exit from construct; 2936 // } 2937 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2938 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2939 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2940 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2941 CGF.EmitBlock(ExitBB); 2942 // __kmpc_cancel_barrier(); 2943 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 2944 // exit from construct; 2945 auto CancelDest = 2946 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2947 CGF.EmitBranchThroughCleanup(CancelDest); 2948 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2949 }; 2950 if (IfCond) 2951 emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {}); 2952 else 2953 ThenGen(CGF); 2954 } 2955 } 2956