1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CGCleanup.h" 17 #include "clang/AST/Decl.h" 18 #include "clang/AST/StmtOpenMP.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/IR/CallSite.h" 21 #include "llvm/IR/DerivedTypes.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/IR/Value.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <cassert> 26 27 using namespace clang; 28 using namespace CodeGen; 29 30 namespace { 31 /// \brief Base class for handling code generation inside OpenMP regions. 32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 33 public: 34 /// \brief Kinds of OpenMP regions used in codegen. 35 enum CGOpenMPRegionKind { 36 /// \brief Region with outlined function for standalone 'parallel' 37 /// directive. 38 ParallelOutlinedRegion, 39 /// \brief Region with outlined function for standalone 'task' directive. 40 TaskOutlinedRegion, 41 /// \brief Region for constructs that do not require function outlining, 42 /// like 'for', 'sections', 'atomic' etc. directives. 43 InlinedRegion, 44 }; 45 46 CGOpenMPRegionInfo(const CapturedStmt &CS, 47 const CGOpenMPRegionKind RegionKind, 48 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind) 49 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 50 CodeGen(CodeGen), Kind(Kind) {} 51 52 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 53 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind) 54 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 55 Kind(Kind) {} 56 57 /// \brief Get a variable or parameter for storing global thread id 58 /// inside OpenMP construct. 59 virtual const VarDecl *getThreadIDVariable() const = 0; 60 61 /// \brief Emit the captured statement body. 62 virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 63 64 /// \brief Get an LValue for the current ThreadID variable. 65 /// \return LValue for thread id variable. This LValue always has type int32*. 66 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 67 68 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 69 70 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 71 72 static bool classof(const CGCapturedStmtInfo *Info) { 73 return Info->getKind() == CR_OpenMP; 74 } 75 76 protected: 77 CGOpenMPRegionKind RegionKind; 78 const RegionCodeGenTy &CodeGen; 79 OpenMPDirectiveKind Kind; 80 }; 81 82 /// \brief API for captured statement code generation in OpenMP constructs. 83 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 84 public: 85 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 86 const RegionCodeGenTy &CodeGen, 87 OpenMPDirectiveKind Kind) 88 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind), 89 ThreadIDVar(ThreadIDVar) { 90 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 91 } 92 /// \brief Get a variable or parameter for storing global thread id 93 /// inside OpenMP construct. 94 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 95 96 /// \brief Get the name of the capture helper. 97 StringRef getHelperName() const override { return ".omp_outlined."; } 98 99 static bool classof(const CGCapturedStmtInfo *Info) { 100 return CGOpenMPRegionInfo::classof(Info) && 101 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 102 ParallelOutlinedRegion; 103 } 104 105 private: 106 /// \brief A variable or parameter storing global thread id for OpenMP 107 /// constructs. 108 const VarDecl *ThreadIDVar; 109 }; 110 111 /// \brief API for captured statement code generation in OpenMP constructs. 112 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 113 public: 114 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 115 const VarDecl *ThreadIDVar, 116 const RegionCodeGenTy &CodeGen, 117 OpenMPDirectiveKind Kind) 118 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind), 119 ThreadIDVar(ThreadIDVar) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 /// \brief Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// \brief Get an LValue for the current ThreadID variable. 127 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 128 129 /// \brief Get the name of the capture helper. 130 StringRef getHelperName() const override { return ".omp_outlined."; } 131 132 static bool classof(const CGCapturedStmtInfo *Info) { 133 return CGOpenMPRegionInfo::classof(Info) && 134 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 135 TaskOutlinedRegion; 136 } 137 138 private: 139 /// \brief A variable or parameter storing global thread id for OpenMP 140 /// constructs. 141 const VarDecl *ThreadIDVar; 142 }; 143 144 /// \brief API for inlined captured statement code generation in OpenMP 145 /// constructs. 146 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 147 public: 148 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 149 const RegionCodeGenTy &CodeGen, 150 OpenMPDirectiveKind Kind) 151 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind), OldCSI(OldCSI), 152 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 153 // \brief Retrieve the value of the context parameter. 154 llvm::Value *getContextValue() const override { 155 if (OuterRegionInfo) 156 return OuterRegionInfo->getContextValue(); 157 llvm_unreachable("No context value for inlined OpenMP region"); 158 } 159 virtual void setContextValue(llvm::Value *V) override { 160 if (OuterRegionInfo) { 161 OuterRegionInfo->setContextValue(V); 162 return; 163 } 164 llvm_unreachable("No context value for inlined OpenMP region"); 165 } 166 /// \brief Lookup the captured field decl for a variable. 167 const FieldDecl *lookup(const VarDecl *VD) const override { 168 if (OuterRegionInfo) 169 return OuterRegionInfo->lookup(VD); 170 // If there is no outer outlined region,no need to lookup in a list of 171 // captured variables, we can use the original one. 172 return nullptr; 173 } 174 FieldDecl *getThisFieldDecl() const override { 175 if (OuterRegionInfo) 176 return OuterRegionInfo->getThisFieldDecl(); 177 return nullptr; 178 } 179 /// \brief Get a variable or parameter for storing global thread id 180 /// inside OpenMP construct. 181 const VarDecl *getThreadIDVariable() const override { 182 if (OuterRegionInfo) 183 return OuterRegionInfo->getThreadIDVariable(); 184 return nullptr; 185 } 186 187 /// \brief Get the name of the capture helper. 188 StringRef getHelperName() const override { 189 if (auto *OuterRegionInfo = getOldCSI()) 190 return OuterRegionInfo->getHelperName(); 191 llvm_unreachable("No helper name for inlined OpenMP construct"); 192 } 193 194 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 195 196 static bool classof(const CGCapturedStmtInfo *Info) { 197 return CGOpenMPRegionInfo::classof(Info) && 198 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 199 } 200 201 private: 202 /// \brief CodeGen info about outer OpenMP region. 203 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 204 CGOpenMPRegionInfo *OuterRegionInfo; 205 }; 206 207 /// \brief RAII for emitting code of OpenMP constructs. 208 class InlinedOpenMPRegionRAII { 209 CodeGenFunction &CGF; 210 211 public: 212 /// \brief Constructs region for combined constructs. 213 /// \param CodeGen Code generation sequence for combined directives. Includes 214 /// a list of functions used for code generation of implicitly inlined 215 /// regions. 216 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 217 OpenMPDirectiveKind Kind) 218 : CGF(CGF) { 219 // Start emission for the construct. 220 CGF.CapturedStmtInfo = 221 new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen, Kind); 222 } 223 ~InlinedOpenMPRegionRAII() { 224 // Restore original CapturedStmtInfo only if we're done with code emission. 225 auto *OldCSI = 226 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 227 delete CGF.CapturedStmtInfo; 228 CGF.CapturedStmtInfo = OldCSI; 229 } 230 }; 231 232 } // namespace 233 234 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 235 return CGF.MakeNaturalAlignAddrLValue( 236 CGF.Builder.CreateAlignedLoad( 237 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 238 CGF.PointerAlignInBytes), 239 getThreadIDVariable() 240 ->getType() 241 ->castAs<PointerType>() 242 ->getPointeeType()); 243 } 244 245 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 246 // 1.2.2 OpenMP Language Terminology 247 // Structured block - An executable statement with a single entry at the 248 // top and a single exit at the bottom. 249 // The point of exit cannot be a branch out of the structured block. 250 // longjmp() and throw() must not violate the entry/exit criteria. 251 CGF.EHStack.pushTerminate(); 252 { 253 CodeGenFunction::RunCleanupsScope Scope(CGF); 254 CodeGen(CGF); 255 } 256 CGF.EHStack.popTerminate(); 257 } 258 259 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 260 CodeGenFunction &CGF) { 261 return CGF.MakeNaturalAlignAddrLValue( 262 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 263 getThreadIDVariable()->getType()); 264 } 265 266 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 267 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { 268 IdentTy = llvm::StructType::create( 269 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 270 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 271 CGM.Int8PtrTy /* psource */, nullptr); 272 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 273 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 274 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 275 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 276 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 277 } 278 279 void CGOpenMPRuntime::clear() { 280 InternalVars.clear(); 281 } 282 283 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 286 assert(ThreadIDVar->getType()->isPointerType() && 287 "thread id variable must be of type kmp_int32 *"); 288 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 289 CodeGenFunction CGF(CGM, true); 290 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind); 291 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 292 return CGF.GenerateCapturedStmtFunction(*CS); 293 } 294 295 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 296 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 297 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 298 assert(!ThreadIDVar->getType()->isPointerType() && 299 "thread id variable must be of type kmp_int32 for tasks"); 300 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 301 CodeGenFunction CGF(CGM, true); 302 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 303 InnermostKind); 304 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 305 return CGF.GenerateCapturedStmtFunction(*CS); 306 } 307 308 llvm::Value * 309 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 310 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 311 if (!Entry) { 312 if (!DefaultOpenMPPSource) { 313 // Initialize default location for psource field of ident_t structure of 314 // all ident_t objects. Format is ";file;function;line;column;;". 315 // Taken from 316 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 317 DefaultOpenMPPSource = 318 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;"); 319 DefaultOpenMPPSource = 320 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 321 } 322 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 323 CGM.getModule(), IdentTy, /*isConstant*/ true, 324 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 325 DefaultOpenMPLocation->setUnnamedAddr(true); 326 327 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 328 llvm::Constant *Values[] = {Zero, 329 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 330 Zero, Zero, DefaultOpenMPPSource}; 331 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 332 DefaultOpenMPLocation->setInitializer(Init); 333 OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation; 334 return DefaultOpenMPLocation; 335 } 336 return Entry; 337 } 338 339 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 340 SourceLocation Loc, 341 OpenMPLocationFlags Flags) { 342 // If no debug info is generated - return global default location. 343 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 344 Loc.isInvalid()) 345 return getOrCreateDefaultLocation(Flags); 346 347 assert(CGF.CurFn && "No function in current CodeGenFunction."); 348 349 llvm::Value *LocValue = nullptr; 350 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 351 if (I != OpenMPLocThreadIDMap.end()) 352 LocValue = I->second.DebugLoc; 353 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 354 // GetOpenMPThreadID was called before this routine. 355 if (LocValue == nullptr) { 356 // Generate "ident_t .kmpc_loc.addr;" 357 llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); 358 AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); 359 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 360 Elem.second.DebugLoc = AI; 361 LocValue = AI; 362 363 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 364 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 365 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 366 llvm::ConstantExpr::getSizeOf(IdentTy), 367 CGM.PointerAlignInBytes); 368 } 369 370 // char **psource = &.kmpc_loc_<flags>.addr.psource; 371 auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0, 372 IdentField_PSource); 373 374 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 375 if (OMPDebugLoc == nullptr) { 376 SmallString<128> Buffer2; 377 llvm::raw_svector_ostream OS2(Buffer2); 378 // Build debug location 379 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 380 OS2 << ";" << PLoc.getFilename() << ";"; 381 if (const FunctionDecl *FD = 382 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 383 OS2 << FD->getQualifiedNameAsString(); 384 } 385 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 386 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 387 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 388 } 389 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 390 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 391 392 return LocValue; 393 } 394 395 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 396 SourceLocation Loc) { 397 assert(CGF.CurFn && "No function in current CodeGenFunction."); 398 399 llvm::Value *ThreadID = nullptr; 400 // Check whether we've already cached a load of the thread id in this 401 // function. 402 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 403 if (I != OpenMPLocThreadIDMap.end()) { 404 ThreadID = I->second.ThreadID; 405 if (ThreadID != nullptr) 406 return ThreadID; 407 } 408 if (auto OMPRegionInfo = 409 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 410 if (OMPRegionInfo->getThreadIDVariable()) { 411 // Check if this an outlined function with thread id passed as argument. 412 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 413 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 414 // If value loaded in entry block, cache it and use it everywhere in 415 // function. 416 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 417 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 418 Elem.second.ThreadID = ThreadID; 419 } 420 return ThreadID; 421 } 422 } 423 424 // This is not an outlined function region - need to call __kmpc_int32 425 // kmpc_global_thread_num(ident_t *loc). 426 // Generate thread id value and cache this value for use across the 427 // function. 428 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 429 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 430 ThreadID = 431 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 432 emitUpdateLocation(CGF, Loc)); 433 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 434 Elem.second.ThreadID = ThreadID; 435 return ThreadID; 436 } 437 438 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 439 assert(CGF.CurFn && "No function in current CodeGenFunction."); 440 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 441 OpenMPLocThreadIDMap.erase(CGF.CurFn); 442 } 443 444 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 445 return llvm::PointerType::getUnqual(IdentTy); 446 } 447 448 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 449 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 450 } 451 452 llvm::Constant * 453 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 454 llvm::Constant *RTLFn = nullptr; 455 switch (Function) { 456 case OMPRTL__kmpc_fork_call: { 457 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 458 // microtask, ...); 459 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 460 getKmpc_MicroPointerTy()}; 461 llvm::FunctionType *FnTy = 462 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 463 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 464 break; 465 } 466 case OMPRTL__kmpc_global_thread_num: { 467 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 468 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 469 llvm::FunctionType *FnTy = 470 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 471 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 472 break; 473 } 474 case OMPRTL__kmpc_threadprivate_cached: { 475 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 476 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 477 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 478 CGM.VoidPtrTy, CGM.SizeTy, 479 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 480 llvm::FunctionType *FnTy = 481 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 482 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 483 break; 484 } 485 case OMPRTL__kmpc_critical: { 486 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 487 // kmp_critical_name *crit); 488 llvm::Type *TypeParams[] = { 489 getIdentTyPointerTy(), CGM.Int32Ty, 490 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 491 llvm::FunctionType *FnTy = 492 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 493 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 494 break; 495 } 496 case OMPRTL__kmpc_threadprivate_register: { 497 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 498 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 499 // typedef void *(*kmpc_ctor)(void *); 500 auto KmpcCtorTy = 501 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 502 /*isVarArg*/ false)->getPointerTo(); 503 // typedef void *(*kmpc_cctor)(void *, void *); 504 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 505 auto KmpcCopyCtorTy = 506 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 507 /*isVarArg*/ false)->getPointerTo(); 508 // typedef void (*kmpc_dtor)(void *); 509 auto KmpcDtorTy = 510 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 511 ->getPointerTo(); 512 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 513 KmpcCopyCtorTy, KmpcDtorTy}; 514 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 515 /*isVarArg*/ false); 516 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 517 break; 518 } 519 case OMPRTL__kmpc_end_critical: { 520 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 521 // kmp_critical_name *crit); 522 llvm::Type *TypeParams[] = { 523 getIdentTyPointerTy(), CGM.Int32Ty, 524 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 525 llvm::FunctionType *FnTy = 526 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 527 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 528 break; 529 } 530 case OMPRTL__kmpc_cancel_barrier: { 531 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 532 // global_tid); 533 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 534 llvm::FunctionType *FnTy = 535 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 536 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 537 break; 538 } 539 case OMPRTL__kmpc_barrier: { 540 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 541 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 542 llvm::FunctionType *FnTy = 543 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 544 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 545 break; 546 } 547 case OMPRTL__kmpc_for_static_fini: { 548 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 549 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 550 llvm::FunctionType *FnTy = 551 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 552 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 553 break; 554 } 555 case OMPRTL__kmpc_push_num_threads: { 556 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 557 // kmp_int32 num_threads) 558 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 559 CGM.Int32Ty}; 560 llvm::FunctionType *FnTy = 561 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 562 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 563 break; 564 } 565 case OMPRTL__kmpc_serialized_parallel: { 566 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 567 // global_tid); 568 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 569 llvm::FunctionType *FnTy = 570 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 571 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 572 break; 573 } 574 case OMPRTL__kmpc_end_serialized_parallel: { 575 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 576 // global_tid); 577 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 578 llvm::FunctionType *FnTy = 579 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 580 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 581 break; 582 } 583 case OMPRTL__kmpc_flush: { 584 // Build void __kmpc_flush(ident_t *loc); 585 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 586 llvm::FunctionType *FnTy = 587 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 588 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 589 break; 590 } 591 case OMPRTL__kmpc_master: { 592 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 593 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 594 llvm::FunctionType *FnTy = 595 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 596 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 597 break; 598 } 599 case OMPRTL__kmpc_end_master: { 600 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 601 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 602 llvm::FunctionType *FnTy = 603 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 604 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 605 break; 606 } 607 case OMPRTL__kmpc_omp_taskyield: { 608 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 609 // int end_part); 610 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 611 llvm::FunctionType *FnTy = 612 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 613 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 614 break; 615 } 616 case OMPRTL__kmpc_single: { 617 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 618 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 619 llvm::FunctionType *FnTy = 620 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 621 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 622 break; 623 } 624 case OMPRTL__kmpc_end_single: { 625 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 626 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 627 llvm::FunctionType *FnTy = 628 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 629 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 630 break; 631 } 632 case OMPRTL__kmpc_omp_task_alloc: { 633 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 634 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 635 // kmp_routine_entry_t *task_entry); 636 assert(KmpRoutineEntryPtrTy != nullptr && 637 "Type kmp_routine_entry_t must be created."); 638 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 639 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 640 // Return void * and then cast to particular kmp_task_t type. 641 llvm::FunctionType *FnTy = 642 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 643 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 644 break; 645 } 646 case OMPRTL__kmpc_omp_task: { 647 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 648 // *new_task); 649 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 650 CGM.VoidPtrTy}; 651 llvm::FunctionType *FnTy = 652 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 653 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 654 break; 655 } 656 case OMPRTL__kmpc_copyprivate: { 657 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 658 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 659 // kmp_int32 didit); 660 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 661 auto *CpyFnTy = 662 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 663 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 664 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 665 CGM.Int32Ty}; 666 llvm::FunctionType *FnTy = 667 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 668 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 669 break; 670 } 671 case OMPRTL__kmpc_reduce: { 672 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 673 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 674 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 675 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 676 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 677 /*isVarArg=*/false); 678 llvm::Type *TypeParams[] = { 679 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 680 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 681 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 682 llvm::FunctionType *FnTy = 683 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 684 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 685 break; 686 } 687 case OMPRTL__kmpc_reduce_nowait: { 688 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 689 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 690 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 691 // *lck); 692 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 693 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 694 /*isVarArg=*/false); 695 llvm::Type *TypeParams[] = { 696 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 697 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 698 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 699 llvm::FunctionType *FnTy = 700 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 701 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 702 break; 703 } 704 case OMPRTL__kmpc_end_reduce: { 705 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 706 // kmp_critical_name *lck); 707 llvm::Type *TypeParams[] = { 708 getIdentTyPointerTy(), CGM.Int32Ty, 709 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 710 llvm::FunctionType *FnTy = 711 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 712 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 713 break; 714 } 715 case OMPRTL__kmpc_end_reduce_nowait: { 716 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 717 // kmp_critical_name *lck); 718 llvm::Type *TypeParams[] = { 719 getIdentTyPointerTy(), CGM.Int32Ty, 720 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 721 llvm::FunctionType *FnTy = 722 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 723 RTLFn = 724 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 725 break; 726 } 727 case OMPRTL__kmpc_omp_task_begin_if0: { 728 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 729 // *new_task); 730 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 731 CGM.VoidPtrTy}; 732 llvm::FunctionType *FnTy = 733 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 734 RTLFn = 735 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 736 break; 737 } 738 case OMPRTL__kmpc_omp_task_complete_if0: { 739 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 740 // *new_task); 741 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 742 CGM.VoidPtrTy}; 743 llvm::FunctionType *FnTy = 744 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 745 RTLFn = CGM.CreateRuntimeFunction(FnTy, 746 /*Name=*/"__kmpc_omp_task_complete_if0"); 747 break; 748 } 749 case OMPRTL__kmpc_ordered: { 750 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 751 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 752 llvm::FunctionType *FnTy = 753 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 754 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 755 break; 756 } 757 case OMPRTL__kmpc_end_ordered: { 758 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 759 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 760 llvm::FunctionType *FnTy = 761 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 762 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 763 break; 764 } 765 case OMPRTL__kmpc_omp_taskwait: { 766 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 767 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 768 llvm::FunctionType *FnTy = 769 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 770 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 771 break; 772 } 773 case OMPRTL__kmpc_taskgroup: { 774 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 775 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 776 llvm::FunctionType *FnTy = 777 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 778 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 779 break; 780 } 781 case OMPRTL__kmpc_end_taskgroup: { 782 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 783 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 784 llvm::FunctionType *FnTy = 785 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 786 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 787 break; 788 } 789 case OMPRTL__kmpc_push_proc_bind: { 790 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 791 // int proc_bind) 792 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 793 llvm::FunctionType *FnTy = 794 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 795 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 796 break; 797 } 798 case OMPRTL__kmpc_omp_task_with_deps: { 799 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 800 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 801 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 802 llvm::Type *TypeParams[] = { 803 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 804 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 805 llvm::FunctionType *FnTy = 806 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 807 RTLFn = 808 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 809 break; 810 } 811 case OMPRTL__kmpc_omp_wait_deps: { 812 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 813 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 814 // kmp_depend_info_t *noalias_dep_list); 815 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 816 CGM.Int32Ty, CGM.VoidPtrTy, 817 CGM.Int32Ty, CGM.VoidPtrTy}; 818 llvm::FunctionType *FnTy = 819 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 820 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 821 break; 822 } 823 case OMPRTL__kmpc_cancellationpoint: { 824 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 825 // global_tid, kmp_int32 cncl_kind) 826 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 827 llvm::FunctionType *FnTy = 828 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 829 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 830 break; 831 } 832 case OMPRTL__kmpc_cancel: { 833 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 834 // kmp_int32 cncl_kind) 835 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 836 llvm::FunctionType *FnTy = 837 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 838 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 839 break; 840 } 841 } 842 return RTLFn; 843 } 844 845 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 846 bool IVSigned) { 847 assert((IVSize == 32 || IVSize == 64) && 848 "IV size is not compatible with the omp runtime"); 849 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 850 : "__kmpc_for_static_init_4u") 851 : (IVSigned ? "__kmpc_for_static_init_8" 852 : "__kmpc_for_static_init_8u"); 853 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 854 auto PtrTy = llvm::PointerType::getUnqual(ITy); 855 llvm::Type *TypeParams[] = { 856 getIdentTyPointerTy(), // loc 857 CGM.Int32Ty, // tid 858 CGM.Int32Ty, // schedtype 859 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 860 PtrTy, // p_lower 861 PtrTy, // p_upper 862 PtrTy, // p_stride 863 ITy, // incr 864 ITy // chunk 865 }; 866 llvm::FunctionType *FnTy = 867 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 868 return CGM.CreateRuntimeFunction(FnTy, Name); 869 } 870 871 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 872 bool IVSigned) { 873 assert((IVSize == 32 || IVSize == 64) && 874 "IV size is not compatible with the omp runtime"); 875 auto Name = 876 IVSize == 32 877 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 878 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 879 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 880 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 881 CGM.Int32Ty, // tid 882 CGM.Int32Ty, // schedtype 883 ITy, // lower 884 ITy, // upper 885 ITy, // stride 886 ITy // chunk 887 }; 888 llvm::FunctionType *FnTy = 889 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 890 return CGM.CreateRuntimeFunction(FnTy, Name); 891 } 892 893 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 894 bool IVSigned) { 895 assert((IVSize == 32 || IVSize == 64) && 896 "IV size is not compatible with the omp runtime"); 897 auto Name = 898 IVSize == 32 899 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 900 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 901 llvm::Type *TypeParams[] = { 902 getIdentTyPointerTy(), // loc 903 CGM.Int32Ty, // tid 904 }; 905 llvm::FunctionType *FnTy = 906 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 907 return CGM.CreateRuntimeFunction(FnTy, Name); 908 } 909 910 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 911 bool IVSigned) { 912 assert((IVSize == 32 || IVSize == 64) && 913 "IV size is not compatible with the omp runtime"); 914 auto Name = 915 IVSize == 32 916 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 917 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 918 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 919 auto PtrTy = llvm::PointerType::getUnqual(ITy); 920 llvm::Type *TypeParams[] = { 921 getIdentTyPointerTy(), // loc 922 CGM.Int32Ty, // tid 923 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 924 PtrTy, // p_lower 925 PtrTy, // p_upper 926 PtrTy // p_stride 927 }; 928 llvm::FunctionType *FnTy = 929 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 930 return CGM.CreateRuntimeFunction(FnTy, Name); 931 } 932 933 llvm::Constant * 934 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 935 assert(!CGM.getLangOpts().OpenMPUseTLS || 936 !CGM.getContext().getTargetInfo().isTLSSupported()); 937 // Lookup the entry, lazily creating it if necessary. 938 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 939 Twine(CGM.getMangledName(VD)) + ".cache."); 940 } 941 942 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 943 const VarDecl *VD, 944 llvm::Value *VDAddr, 945 SourceLocation Loc) { 946 if (CGM.getLangOpts().OpenMPUseTLS && 947 CGM.getContext().getTargetInfo().isTLSSupported()) 948 return VDAddr; 949 950 auto VarTy = VDAddr->getType()->getPointerElementType(); 951 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 952 CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), 953 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 954 getOrCreateThreadPrivateCache(VD)}; 955 return CGF.EmitRuntimeCall( 956 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); 957 } 958 959 void CGOpenMPRuntime::emitThreadPrivateVarInit( 960 CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, 961 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 962 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 963 // library. 964 auto OMPLoc = emitUpdateLocation(CGF, Loc); 965 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 966 OMPLoc); 967 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 968 // to register constructor/destructor for variable. 969 llvm::Value *Args[] = {OMPLoc, 970 CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), 971 Ctor, CopyCtor, Dtor}; 972 CGF.EmitRuntimeCall( 973 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 974 } 975 976 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 977 const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, 978 bool PerformInit, CodeGenFunction *CGF) { 979 if (CGM.getLangOpts().OpenMPUseTLS && 980 CGM.getContext().getTargetInfo().isTLSSupported()) 981 return nullptr; 982 983 VD = VD->getDefinition(CGM.getContext()); 984 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 985 ThreadPrivateWithDefinition.insert(VD); 986 QualType ASTTy = VD->getType(); 987 988 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 989 auto Init = VD->getAnyInitializer(); 990 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 991 // Generate function that re-emits the declaration's initializer into the 992 // threadprivate copy of the variable VD 993 CodeGenFunction CtorCGF(CGM); 994 FunctionArgList Args; 995 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 996 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 997 Args.push_back(&Dst); 998 999 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1000 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 1001 /*isVariadic=*/false); 1002 auto FTy = CGM.getTypes().GetFunctionType(FI); 1003 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1004 FTy, ".__kmpc_global_ctor_.", Loc); 1005 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1006 Args, SourceLocation()); 1007 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1008 CtorCGF.GetAddrOfLocalVar(&Dst), 1009 /*Volatile=*/false, CGM.PointerAlignInBytes, 1010 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1011 auto Arg = CtorCGF.Builder.CreatePointerCast( 1012 ArgVal, 1013 CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); 1014 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1015 /*IsInitializer=*/true); 1016 ArgVal = CtorCGF.EmitLoadOfScalar( 1017 CtorCGF.GetAddrOfLocalVar(&Dst), 1018 /*Volatile=*/false, CGM.PointerAlignInBytes, 1019 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1020 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1021 CtorCGF.FinishFunction(); 1022 Ctor = Fn; 1023 } 1024 if (VD->getType().isDestructedType() != QualType::DK_none) { 1025 // Generate function that emits destructor call for the threadprivate copy 1026 // of the variable VD 1027 CodeGenFunction DtorCGF(CGM); 1028 FunctionArgList Args; 1029 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1030 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1031 Args.push_back(&Dst); 1032 1033 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1034 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 1035 /*isVariadic=*/false); 1036 auto FTy = CGM.getTypes().GetFunctionType(FI); 1037 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1038 FTy, ".__kmpc_global_dtor_.", Loc); 1039 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1040 SourceLocation()); 1041 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1042 DtorCGF.GetAddrOfLocalVar(&Dst), 1043 /*Volatile=*/false, CGM.PointerAlignInBytes, 1044 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1045 DtorCGF.emitDestroy(ArgVal, ASTTy, 1046 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1047 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1048 DtorCGF.FinishFunction(); 1049 Dtor = Fn; 1050 } 1051 // Do not emit init function if it is not required. 1052 if (!Ctor && !Dtor) 1053 return nullptr; 1054 1055 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1056 auto CopyCtorTy = 1057 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1058 /*isVarArg=*/false)->getPointerTo(); 1059 // Copying constructor for the threadprivate variable. 1060 // Must be NULL - reserved by runtime, but currently it requires that this 1061 // parameter is always NULL. Otherwise it fires assertion. 1062 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1063 if (Ctor == nullptr) { 1064 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1065 /*isVarArg=*/false)->getPointerTo(); 1066 Ctor = llvm::Constant::getNullValue(CtorTy); 1067 } 1068 if (Dtor == nullptr) { 1069 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1070 /*isVarArg=*/false)->getPointerTo(); 1071 Dtor = llvm::Constant::getNullValue(DtorTy); 1072 } 1073 if (!CGF) { 1074 auto InitFunctionTy = 1075 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1076 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1077 InitFunctionTy, ".__omp_threadprivate_init_."); 1078 CodeGenFunction InitCGF(CGM); 1079 FunctionArgList ArgList; 1080 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1081 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1082 Loc); 1083 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1084 InitCGF.FinishFunction(); 1085 return InitFunction; 1086 } 1087 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1088 } 1089 return nullptr; 1090 } 1091 1092 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1093 /// function. Here is the logic: 1094 /// if (Cond) { 1095 /// ThenGen(); 1096 /// } else { 1097 /// ElseGen(); 1098 /// } 1099 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1100 const RegionCodeGenTy &ThenGen, 1101 const RegionCodeGenTy &ElseGen) { 1102 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1103 1104 // If the condition constant folds and can be elided, try to avoid emitting 1105 // the condition and the dead arm of the if/else. 1106 bool CondConstant; 1107 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1108 CodeGenFunction::RunCleanupsScope Scope(CGF); 1109 if (CondConstant) { 1110 ThenGen(CGF); 1111 } else { 1112 ElseGen(CGF); 1113 } 1114 return; 1115 } 1116 1117 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1118 // emit the conditional branch. 1119 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1120 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1121 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1122 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1123 1124 // Emit the 'then' code. 1125 CGF.EmitBlock(ThenBlock); 1126 { 1127 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1128 ThenGen(CGF); 1129 } 1130 CGF.EmitBranch(ContBlock); 1131 // Emit the 'else' code if present. 1132 { 1133 // There is no need to emit line number for unconditional branch. 1134 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1135 CGF.EmitBlock(ElseBlock); 1136 } 1137 { 1138 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1139 ElseGen(CGF); 1140 } 1141 { 1142 // There is no need to emit line number for unconditional branch. 1143 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1144 CGF.EmitBranch(ContBlock); 1145 } 1146 // Emit the continuation block for code after the if. 1147 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1148 } 1149 1150 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1151 llvm::Value *OutlinedFn, 1152 llvm::Value *CapturedStruct, 1153 const Expr *IfCond) { 1154 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1155 auto &&ThenGen = 1156 [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) { 1157 // Build call __kmpc_fork_call(loc, 1, microtask, 1158 // captured_struct/*context*/) 1159 llvm::Value *Args[] = { 1160 RTLoc, 1161 CGF.Builder.getInt32( 1162 1), // Number of arguments after 'microtask' argument 1163 // (there is only one additional argument - 'context') 1164 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), 1165 CGF.EmitCastToVoidPtr(CapturedStruct)}; 1166 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 1167 CGF.EmitRuntimeCall(RTLFn, Args); 1168 }; 1169 auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc]( 1170 CodeGenFunction &CGF) { 1171 auto ThreadID = getThreadID(CGF, Loc); 1172 // Build calls: 1173 // __kmpc_serialized_parallel(&Loc, GTid); 1174 llvm::Value *Args[] = {RTLoc, ThreadID}; 1175 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 1176 Args); 1177 1178 // OutlinedFn(>id, &zero, CapturedStruct); 1179 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 1180 auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, 1181 /*Signed*/ true); 1182 auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr"); 1183 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1184 llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct}; 1185 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1186 1187 // __kmpc_end_serialized_parallel(&Loc, GTid); 1188 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 1189 CGF.EmitRuntimeCall( 1190 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 1191 }; 1192 if (IfCond) { 1193 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1194 } else { 1195 CodeGenFunction::RunCleanupsScope Scope(CGF); 1196 ThenGen(CGF); 1197 } 1198 } 1199 1200 // If we're inside an (outlined) parallel region, use the region info's 1201 // thread-ID variable (it is passed in a first argument of the outlined function 1202 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1203 // regular serial code region, get thread ID by calling kmp_int32 1204 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1205 // return the address of that temp. 1206 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1207 SourceLocation Loc) { 1208 if (auto OMPRegionInfo = 1209 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1210 if (OMPRegionInfo->getThreadIDVariable()) 1211 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1212 1213 auto ThreadID = getThreadID(CGF, Loc); 1214 auto Int32Ty = 1215 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1216 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1217 CGF.EmitStoreOfScalar(ThreadID, 1218 CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty)); 1219 1220 return ThreadIDTemp; 1221 } 1222 1223 llvm::Constant * 1224 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1225 const llvm::Twine &Name) { 1226 SmallString<256> Buffer; 1227 llvm::raw_svector_ostream Out(Buffer); 1228 Out << Name; 1229 auto RuntimeName = Out.str(); 1230 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1231 if (Elem.second) { 1232 assert(Elem.second->getType()->getPointerElementType() == Ty && 1233 "OMP internal variable has different type than requested"); 1234 return &*Elem.second; 1235 } 1236 1237 return Elem.second = new llvm::GlobalVariable( 1238 CGM.getModule(), Ty, /*IsConstant*/ false, 1239 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1240 Elem.first()); 1241 } 1242 1243 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1244 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1245 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1246 } 1247 1248 namespace { 1249 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup { 1250 llvm::Value *Callee; 1251 llvm::Value *Args[N]; 1252 1253 public: 1254 CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) 1255 : Callee(Callee) { 1256 assert(CleanupArgs.size() == N); 1257 std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); 1258 } 1259 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 1260 CGF.EmitRuntimeCall(Callee, Args); 1261 } 1262 }; 1263 } // namespace 1264 1265 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1266 StringRef CriticalName, 1267 const RegionCodeGenTy &CriticalOpGen, 1268 SourceLocation Loc) { 1269 // __kmpc_critical(ident_t *, gtid, Lock); 1270 // CriticalOpGen(); 1271 // __kmpc_end_critical(ident_t *, gtid, Lock); 1272 // Prepare arguments and build a call to __kmpc_critical 1273 { 1274 CodeGenFunction::RunCleanupsScope Scope(CGF); 1275 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1276 getCriticalRegionLock(CriticalName)}; 1277 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 1278 // Build a call to __kmpc_end_critical 1279 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1280 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), 1281 llvm::makeArrayRef(Args)); 1282 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 1283 } 1284 } 1285 1286 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 1287 OpenMPDirectiveKind Kind, SourceLocation Loc, 1288 const RegionCodeGenTy &BodyOpGen) { 1289 llvm::Value *CallBool = CGF.EmitScalarConversion( 1290 IfCond, 1291 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 1292 CGF.getContext().BoolTy, Loc); 1293 1294 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1295 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 1296 // Generate the branch (If-stmt) 1297 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1298 CGF.EmitBlock(ThenBlock); 1299 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); 1300 // Emit the rest of bblocks/branches 1301 CGF.EmitBranch(ContBlock); 1302 CGF.EmitBlock(ContBlock, true); 1303 } 1304 1305 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 1306 const RegionCodeGenTy &MasterOpGen, 1307 SourceLocation Loc) { 1308 // if(__kmpc_master(ident_t *, gtid)) { 1309 // MasterOpGen(); 1310 // __kmpc_end_master(ident_t *, gtid); 1311 // } 1312 // Prepare arguments and build a call to __kmpc_master 1313 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1314 auto *IsMaster = 1315 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 1316 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1317 MasterCallEndCleanup; 1318 emitIfStmt( 1319 CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { 1320 CodeGenFunction::RunCleanupsScope Scope(CGF); 1321 CGF.EHStack.pushCleanup<MasterCallEndCleanup>( 1322 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), 1323 llvm::makeArrayRef(Args)); 1324 MasterOpGen(CGF); 1325 }); 1326 } 1327 1328 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 1329 SourceLocation Loc) { 1330 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 1331 llvm::Value *Args[] = { 1332 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1333 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 1334 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 1335 } 1336 1337 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 1338 const RegionCodeGenTy &TaskgroupOpGen, 1339 SourceLocation Loc) { 1340 // __kmpc_taskgroup(ident_t *, gtid); 1341 // TaskgroupOpGen(); 1342 // __kmpc_end_taskgroup(ident_t *, gtid); 1343 // Prepare arguments and build a call to __kmpc_taskgroup 1344 { 1345 CodeGenFunction::RunCleanupsScope Scope(CGF); 1346 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1347 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); 1348 // Build a call to __kmpc_end_taskgroup 1349 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1350 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 1351 llvm::makeArrayRef(Args)); 1352 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 1353 } 1354 } 1355 1356 static llvm::Value *emitCopyprivateCopyFunction( 1357 CodeGenModule &CGM, llvm::Type *ArgsType, 1358 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 1359 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 1360 auto &C = CGM.getContext(); 1361 // void copy_func(void *LHSArg, void *RHSArg); 1362 FunctionArgList Args; 1363 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1364 C.VoidPtrTy); 1365 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1366 C.VoidPtrTy); 1367 Args.push_back(&LHSArg); 1368 Args.push_back(&RHSArg); 1369 FunctionType::ExtInfo EI; 1370 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1371 C.VoidTy, Args, EI, /*isVariadic=*/false); 1372 auto *Fn = llvm::Function::Create( 1373 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1374 ".omp.copyprivate.copy_func", &CGM.getModule()); 1375 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 1376 CodeGenFunction CGF(CGM); 1377 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1378 // Dest = (void*[n])(LHSArg); 1379 // Src = (void*[n])(RHSArg); 1380 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1381 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), 1382 CGF.PointerAlignInBytes), 1383 ArgsType); 1384 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1385 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), 1386 CGF.PointerAlignInBytes), 1387 ArgsType); 1388 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1389 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1390 // ... 1391 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1392 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1393 auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1394 CGF.Builder.CreateAlignedLoad( 1395 CGF.Builder.CreateStructGEP(nullptr, LHS, I), 1396 CGM.PointerAlignInBytes), 1397 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); 1398 auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1399 CGF.Builder.CreateAlignedLoad( 1400 CGF.Builder.CreateStructGEP(nullptr, RHS, I), 1401 CGM.PointerAlignInBytes), 1402 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); 1403 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 1404 QualType Type = VD->getType(); 1405 CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr, 1406 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()), 1407 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()), 1408 AssignmentOps[I]); 1409 } 1410 CGF.FinishFunction(); 1411 return Fn; 1412 } 1413 1414 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1415 const RegionCodeGenTy &SingleOpGen, 1416 SourceLocation Loc, 1417 ArrayRef<const Expr *> CopyprivateVars, 1418 ArrayRef<const Expr *> SrcExprs, 1419 ArrayRef<const Expr *> DstExprs, 1420 ArrayRef<const Expr *> AssignmentOps) { 1421 assert(CopyprivateVars.size() == SrcExprs.size() && 1422 CopyprivateVars.size() == DstExprs.size() && 1423 CopyprivateVars.size() == AssignmentOps.size()); 1424 auto &C = CGM.getContext(); 1425 // int32 did_it = 0; 1426 // if(__kmpc_single(ident_t *, gtid)) { 1427 // SingleOpGen(); 1428 // __kmpc_end_single(ident_t *, gtid); 1429 // did_it = 1; 1430 // } 1431 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1432 // <copy_func>, did_it); 1433 1434 llvm::AllocaInst *DidIt = nullptr; 1435 if (!CopyprivateVars.empty()) { 1436 // int32 did_it = 0; 1437 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1438 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 1439 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt, 1440 DidIt->getAlignment()); 1441 } 1442 // Prepare arguments and build a call to __kmpc_single 1443 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1444 auto *IsSingle = 1445 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 1446 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1447 SingleCallEndCleanup; 1448 emitIfStmt( 1449 CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { 1450 CodeGenFunction::RunCleanupsScope Scope(CGF); 1451 CGF.EHStack.pushCleanup<SingleCallEndCleanup>( 1452 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), 1453 llvm::makeArrayRef(Args)); 1454 SingleOpGen(CGF); 1455 if (DidIt) { 1456 // did_it = 1; 1457 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt, 1458 DidIt->getAlignment()); 1459 } 1460 }); 1461 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1462 // <copy_func>, did_it); 1463 if (DidIt) { 1464 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 1465 auto CopyprivateArrayTy = 1466 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1467 /*IndexTypeQuals=*/0); 1468 // Create a list of all private variables for copyprivate. 1469 auto *CopyprivateList = 1470 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 1471 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 1472 auto *Elem = CGF.Builder.CreateStructGEP( 1473 CopyprivateList->getAllocatedType(), CopyprivateList, I); 1474 CGF.Builder.CreateAlignedStore( 1475 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1476 CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy), 1477 Elem, CGM.PointerAlignInBytes); 1478 } 1479 // Build function that copies private values from single region to all other 1480 // threads in the corresponding parallel region. 1481 auto *CpyFn = emitCopyprivateCopyFunction( 1482 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 1483 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 1484 auto *BufSize = llvm::ConstantInt::get( 1485 CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity()); 1486 auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 1487 CGF.VoidPtrTy); 1488 auto *DidItVal = 1489 CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes); 1490 llvm::Value *Args[] = { 1491 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 1492 getThreadID(CGF, Loc), // i32 <gtid> 1493 BufSize, // size_t <buf_size> 1494 CL, // void *<copyprivate list> 1495 CpyFn, // void (*) (void *, void *) <copy_func> 1496 DidItVal // i32 did_it 1497 }; 1498 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 1499 } 1500 } 1501 1502 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 1503 const RegionCodeGenTy &OrderedOpGen, 1504 SourceLocation Loc) { 1505 // __kmpc_ordered(ident_t *, gtid); 1506 // OrderedOpGen(); 1507 // __kmpc_end_ordered(ident_t *, gtid); 1508 // Prepare arguments and build a call to __kmpc_ordered 1509 { 1510 CodeGenFunction::RunCleanupsScope Scope(CGF); 1511 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1512 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); 1513 // Build a call to __kmpc_end_ordered 1514 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1515 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), 1516 llvm::makeArrayRef(Args)); 1517 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 1518 } 1519 } 1520 1521 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 1522 OpenMPDirectiveKind Kind, 1523 bool CheckForCancel) { 1524 // Build call __kmpc_cancel_barrier(loc, thread_id); 1525 // Build call __kmpc_barrier(loc, thread_id); 1526 OpenMPLocationFlags Flags = OMP_IDENT_KMPC; 1527 if (Kind == OMPD_for) { 1528 Flags = 1529 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); 1530 } else if (Kind == OMPD_sections) { 1531 Flags = static_cast<OpenMPLocationFlags>(Flags | 1532 OMP_IDENT_BARRIER_IMPL_SECTIONS); 1533 } else if (Kind == OMPD_single) { 1534 Flags = 1535 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); 1536 } else if (Kind == OMPD_barrier) { 1537 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); 1538 } else { 1539 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); 1540 } 1541 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 1542 // thread_id); 1543 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 1544 getThreadID(CGF, Loc)}; 1545 if (auto *OMPRegionInfo = 1546 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1547 auto CancelDestination = 1548 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 1549 if (CancelDestination.isValid()) { 1550 auto *Result = CGF.EmitRuntimeCall( 1551 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 1552 if (CheckForCancel) { 1553 // if (__kmpc_cancel_barrier()) { 1554 // exit from construct; 1555 // } 1556 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 1557 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 1558 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 1559 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 1560 CGF.EmitBlock(ExitBB); 1561 // exit from construct; 1562 CGF.EmitBranchThroughCleanup(CancelDestination); 1563 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 1564 } 1565 return; 1566 } 1567 } 1568 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 1569 } 1570 1571 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 1572 /// the enum sched_type in kmp.h). 1573 enum OpenMPSchedType { 1574 /// \brief Lower bound for default (unordered) versions. 1575 OMP_sch_lower = 32, 1576 OMP_sch_static_chunked = 33, 1577 OMP_sch_static = 34, 1578 OMP_sch_dynamic_chunked = 35, 1579 OMP_sch_guided_chunked = 36, 1580 OMP_sch_runtime = 37, 1581 OMP_sch_auto = 38, 1582 /// \brief Lower bound for 'ordered' versions. 1583 OMP_ord_lower = 64, 1584 OMP_ord_static_chunked = 65, 1585 OMP_ord_static = 66, 1586 OMP_ord_dynamic_chunked = 67, 1587 OMP_ord_guided_chunked = 68, 1588 OMP_ord_runtime = 69, 1589 OMP_ord_auto = 70, 1590 OMP_sch_default = OMP_sch_static, 1591 }; 1592 1593 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 1594 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 1595 bool Chunked, bool Ordered) { 1596 switch (ScheduleKind) { 1597 case OMPC_SCHEDULE_static: 1598 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 1599 : (Ordered ? OMP_ord_static : OMP_sch_static); 1600 case OMPC_SCHEDULE_dynamic: 1601 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 1602 case OMPC_SCHEDULE_guided: 1603 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 1604 case OMPC_SCHEDULE_runtime: 1605 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 1606 case OMPC_SCHEDULE_auto: 1607 return Ordered ? OMP_ord_auto : OMP_sch_auto; 1608 case OMPC_SCHEDULE_unknown: 1609 assert(!Chunked && "chunk was specified but schedule kind not known"); 1610 return Ordered ? OMP_ord_static : OMP_sch_static; 1611 } 1612 llvm_unreachable("Unexpected runtime schedule"); 1613 } 1614 1615 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 1616 bool Chunked) const { 1617 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 1618 return Schedule == OMP_sch_static; 1619 } 1620 1621 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 1622 auto Schedule = 1623 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 1624 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 1625 return Schedule != OMP_sch_static; 1626 } 1627 1628 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, 1629 OpenMPScheduleClauseKind ScheduleKind, 1630 unsigned IVSize, bool IVSigned, bool Ordered, 1631 llvm::Value *IL, llvm::Value *LB, 1632 llvm::Value *UB, llvm::Value *ST, 1633 llvm::Value *Chunk) { 1634 OpenMPSchedType Schedule = 1635 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1636 if (Ordered || 1637 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 1638 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) { 1639 // Call __kmpc_dispatch_init( 1640 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 1641 // kmp_int[32|64] lower, kmp_int[32|64] upper, 1642 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 1643 1644 // If the Chunk was not specified in the clause - use default value 1. 1645 if (Chunk == nullptr) 1646 Chunk = CGF.Builder.getIntN(IVSize, 1); 1647 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1648 getThreadID(CGF, Loc), 1649 CGF.Builder.getInt32(Schedule), // Schedule type 1650 CGF.Builder.getIntN(IVSize, 0), // Lower 1651 UB, // Upper 1652 CGF.Builder.getIntN(IVSize, 1), // Stride 1653 Chunk // Chunk 1654 }; 1655 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 1656 } else { 1657 // Call __kmpc_for_static_init( 1658 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 1659 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 1660 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 1661 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 1662 if (Chunk == nullptr) { 1663 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && 1664 "expected static non-chunked schedule"); 1665 // If the Chunk was not specified in the clause - use default value 1. 1666 Chunk = CGF.Builder.getIntN(IVSize, 1); 1667 } else 1668 assert((Schedule == OMP_sch_static_chunked || 1669 Schedule == OMP_ord_static_chunked) && 1670 "expected static chunked schedule"); 1671 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1672 getThreadID(CGF, Loc), 1673 CGF.Builder.getInt32(Schedule), // Schedule type 1674 IL, // &isLastIter 1675 LB, // &LB 1676 UB, // &UB 1677 ST, // &Stride 1678 CGF.Builder.getIntN(IVSize, 1), // Incr 1679 Chunk // Chunk 1680 }; 1681 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); 1682 } 1683 } 1684 1685 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 1686 SourceLocation Loc) { 1687 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1688 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1689 getThreadID(CGF, Loc)}; 1690 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1691 Args); 1692 } 1693 1694 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 1695 SourceLocation Loc, 1696 unsigned IVSize, 1697 bool IVSigned) { 1698 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 1699 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1700 getThreadID(CGF, Loc)}; 1701 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 1702 } 1703 1704 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 1705 SourceLocation Loc, unsigned IVSize, 1706 bool IVSigned, llvm::Value *IL, 1707 llvm::Value *LB, llvm::Value *UB, 1708 llvm::Value *ST) { 1709 // Call __kmpc_dispatch_next( 1710 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 1711 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 1712 // kmp_int[32|64] *p_stride); 1713 llvm::Value *Args[] = { 1714 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 1715 IL, // &isLastIter 1716 LB, // &Lower 1717 UB, // &Upper 1718 ST // &Stride 1719 }; 1720 llvm::Value *Call = 1721 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 1722 return CGF.EmitScalarConversion( 1723 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 1724 CGF.getContext().BoolTy, Loc); 1725 } 1726 1727 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1728 llvm::Value *NumThreads, 1729 SourceLocation Loc) { 1730 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1731 llvm::Value *Args[] = { 1732 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1733 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1734 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1735 Args); 1736 } 1737 1738 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 1739 OpenMPProcBindClauseKind ProcBind, 1740 SourceLocation Loc) { 1741 // Constants for proc bind value accepted by the runtime. 1742 enum ProcBindTy { 1743 ProcBindFalse = 0, 1744 ProcBindTrue, 1745 ProcBindMaster, 1746 ProcBindClose, 1747 ProcBindSpread, 1748 ProcBindIntel, 1749 ProcBindDefault 1750 } RuntimeProcBind; 1751 switch (ProcBind) { 1752 case OMPC_PROC_BIND_master: 1753 RuntimeProcBind = ProcBindMaster; 1754 break; 1755 case OMPC_PROC_BIND_close: 1756 RuntimeProcBind = ProcBindClose; 1757 break; 1758 case OMPC_PROC_BIND_spread: 1759 RuntimeProcBind = ProcBindSpread; 1760 break; 1761 case OMPC_PROC_BIND_unknown: 1762 llvm_unreachable("Unsupported proc_bind value."); 1763 } 1764 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 1765 llvm::Value *Args[] = { 1766 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1767 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 1768 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 1769 } 1770 1771 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1772 SourceLocation Loc) { 1773 // Build call void __kmpc_flush(ident_t *loc) 1774 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1775 emitUpdateLocation(CGF, Loc)); 1776 } 1777 1778 namespace { 1779 /// \brief Indexes of fields for type kmp_task_t. 1780 enum KmpTaskTFields { 1781 /// \brief List of shared variables. 1782 KmpTaskTShareds, 1783 /// \brief Task routine. 1784 KmpTaskTRoutine, 1785 /// \brief Partition id for the untied tasks. 1786 KmpTaskTPartId, 1787 /// \brief Function with call of destructors for private variables. 1788 KmpTaskTDestructors, 1789 }; 1790 } // namespace 1791 1792 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 1793 if (!KmpRoutineEntryPtrTy) { 1794 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 1795 auto &C = CGM.getContext(); 1796 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 1797 FunctionProtoType::ExtProtoInfo EPI; 1798 KmpRoutineEntryPtrQTy = C.getPointerType( 1799 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 1800 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 1801 } 1802 } 1803 1804 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1805 QualType FieldTy) { 1806 auto *Field = FieldDecl::Create( 1807 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1808 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1809 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1810 Field->setAccess(AS_public); 1811 DC->addDecl(Field); 1812 } 1813 1814 namespace { 1815 struct PrivateHelpersTy { 1816 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 1817 const VarDecl *PrivateElemInit) 1818 : Original(Original), PrivateCopy(PrivateCopy), 1819 PrivateElemInit(PrivateElemInit) {} 1820 const VarDecl *Original; 1821 const VarDecl *PrivateCopy; 1822 const VarDecl *PrivateElemInit; 1823 }; 1824 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 1825 } // namespace 1826 1827 static RecordDecl * 1828 createPrivatesRecordDecl(CodeGenModule &CGM, 1829 const ArrayRef<PrivateDataTy> Privates) { 1830 if (!Privates.empty()) { 1831 auto &C = CGM.getContext(); 1832 // Build struct .kmp_privates_t. { 1833 // /* private vars */ 1834 // }; 1835 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 1836 RD->startDefinition(); 1837 for (auto &&Pair : Privates) { 1838 auto Type = Pair.second.Original->getType(); 1839 Type = Type.getNonReferenceType(); 1840 addFieldToRecordDecl(C, RD, Type); 1841 } 1842 RD->completeDefinition(); 1843 return RD; 1844 } 1845 return nullptr; 1846 } 1847 1848 static RecordDecl * 1849 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, 1850 QualType KmpRoutineEntryPointerQTy) { 1851 auto &C = CGM.getContext(); 1852 // Build struct kmp_task_t { 1853 // void * shareds; 1854 // kmp_routine_entry_t routine; 1855 // kmp_int32 part_id; 1856 // kmp_routine_entry_t destructors; 1857 // }; 1858 auto *RD = C.buildImplicitRecord("kmp_task_t"); 1859 RD->startDefinition(); 1860 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1861 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1862 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1863 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1864 RD->completeDefinition(); 1865 return RD; 1866 } 1867 1868 static RecordDecl * 1869 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 1870 const ArrayRef<PrivateDataTy> Privates) { 1871 auto &C = CGM.getContext(); 1872 // Build struct kmp_task_t_with_privates { 1873 // kmp_task_t task_data; 1874 // .kmp_privates_t. privates; 1875 // }; 1876 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 1877 RD->startDefinition(); 1878 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 1879 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 1880 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 1881 } 1882 RD->completeDefinition(); 1883 return RD; 1884 } 1885 1886 /// \brief Emit a proxy function which accepts kmp_task_t as the second 1887 /// argument. 1888 /// \code 1889 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 1890 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, 1891 /// tt->shareds); 1892 /// return 0; 1893 /// } 1894 /// \endcode 1895 static llvm::Value * 1896 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 1897 QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, 1898 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 1899 QualType SharedsPtrTy, llvm::Value *TaskFunction, 1900 llvm::Value *TaskPrivatesMap) { 1901 auto &C = CGM.getContext(); 1902 FunctionArgList Args; 1903 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 1904 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 1905 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 1906 Args.push_back(&GtidArg); 1907 Args.push_back(&TaskTypeArg); 1908 FunctionType::ExtInfo Info; 1909 auto &TaskEntryFnInfo = 1910 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 1911 /*isVariadic=*/false); 1912 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 1913 auto *TaskEntry = 1914 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 1915 ".omp_task_entry.", &CGM.getModule()); 1916 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry); 1917 CodeGenFunction CGF(CGM); 1918 CGF.disableDebugInfo(); 1919 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 1920 1921 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 1922 // tt->task_data.shareds); 1923 auto *GtidParam = CGF.EmitLoadOfScalar( 1924 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, 1925 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); 1926 auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad( 1927 CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes); 1928 LValue TDBase = 1929 CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy); 1930 auto *KmpTaskTWithPrivatesQTyRD = 1931 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 1932 LValue Base = 1933 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 1934 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 1935 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 1936 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 1937 auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); 1938 1939 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 1940 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 1941 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1942 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 1943 CGF.ConvertTypeForMem(SharedsPtrTy)); 1944 1945 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 1946 llvm::Value *PrivatesParam; 1947 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 1948 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 1949 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1950 PrivatesLVal.getAddress(), CGF.VoidPtrTy); 1951 } else { 1952 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 1953 } 1954 1955 llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, 1956 TaskPrivatesMap, SharedsParam}; 1957 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 1958 CGF.EmitStoreThroughLValue( 1959 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 1960 CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 1961 CGF.FinishFunction(); 1962 return TaskEntry; 1963 } 1964 1965 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 1966 SourceLocation Loc, 1967 QualType KmpInt32Ty, 1968 QualType KmpTaskTWithPrivatesPtrQTy, 1969 QualType KmpTaskTWithPrivatesQTy) { 1970 auto &C = CGM.getContext(); 1971 FunctionArgList Args; 1972 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 1973 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 1974 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 1975 Args.push_back(&GtidArg); 1976 Args.push_back(&TaskTypeArg); 1977 FunctionType::ExtInfo Info; 1978 auto &DestructorFnInfo = 1979 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 1980 /*isVariadic=*/false); 1981 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 1982 auto *DestructorFn = 1983 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 1984 ".omp_task_destructor.", &CGM.getModule()); 1985 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn); 1986 CodeGenFunction CGF(CGM); 1987 CGF.disableDebugInfo(); 1988 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 1989 Args); 1990 1991 auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad( 1992 CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes); 1993 LValue Base = 1994 CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy); 1995 auto *KmpTaskTWithPrivatesQTyRD = 1996 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 1997 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 1998 Base = CGF.EmitLValueForField(Base, *FI); 1999 for (auto *Field : 2000 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 2001 if (auto DtorKind = Field->getType().isDestructedType()) { 2002 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 2003 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 2004 } 2005 } 2006 CGF.FinishFunction(); 2007 return DestructorFn; 2008 } 2009 2010 /// \brief Emit a privates mapping function for correct handling of private and 2011 /// firstprivate variables. 2012 /// \code 2013 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 2014 /// **noalias priv1,..., <tyn> **noalias privn) { 2015 /// *priv1 = &.privates.priv1; 2016 /// ...; 2017 /// *privn = &.privates.privn; 2018 /// } 2019 /// \endcode 2020 static llvm::Value * 2021 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 2022 const ArrayRef<const Expr *> PrivateVars, 2023 const ArrayRef<const Expr *> FirstprivateVars, 2024 QualType PrivatesQTy, 2025 const ArrayRef<PrivateDataTy> Privates) { 2026 auto &C = CGM.getContext(); 2027 FunctionArgList Args; 2028 ImplicitParamDecl TaskPrivatesArg( 2029 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 2030 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 2031 Args.push_back(&TaskPrivatesArg); 2032 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 2033 unsigned Counter = 1; 2034 for (auto *E: PrivateVars) { 2035 Args.push_back(ImplicitParamDecl::Create( 2036 C, /*DC=*/nullptr, Loc, 2037 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2038 .withConst() 2039 .withRestrict())); 2040 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2041 PrivateVarsPos[VD] = Counter; 2042 ++Counter; 2043 } 2044 for (auto *E : FirstprivateVars) { 2045 Args.push_back(ImplicitParamDecl::Create( 2046 C, /*DC=*/nullptr, Loc, 2047 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2048 .withConst() 2049 .withRestrict())); 2050 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2051 PrivateVarsPos[VD] = Counter; 2052 ++Counter; 2053 } 2054 FunctionType::ExtInfo Info; 2055 auto &TaskPrivatesMapFnInfo = 2056 CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info, 2057 /*isVariadic=*/false); 2058 auto *TaskPrivatesMapTy = 2059 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 2060 auto *TaskPrivatesMap = llvm::Function::Create( 2061 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 2062 ".omp_task_privates_map.", &CGM.getModule()); 2063 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo, 2064 TaskPrivatesMap); 2065 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 2066 CodeGenFunction CGF(CGM); 2067 CGF.disableDebugInfo(); 2068 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 2069 TaskPrivatesMapFnInfo, Args); 2070 2071 // *privi = &.privates.privi; 2072 auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad( 2073 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes); 2074 LValue Base = 2075 CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy); 2076 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 2077 Counter = 0; 2078 for (auto *Field : PrivatesQTyRD->fields()) { 2079 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 2080 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 2081 auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD), 2082 VD->getType()); 2083 auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc); 2084 CGF.EmitStoreOfScalar( 2085 FieldLVal.getAddress(), 2086 CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(), 2087 RefLVal.getType()->getPointeeType())); 2088 ++Counter; 2089 } 2090 CGF.FinishFunction(); 2091 return TaskPrivatesMap; 2092 } 2093 2094 static int array_pod_sort_comparator(const PrivateDataTy *P1, 2095 const PrivateDataTy *P2) { 2096 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 2097 } 2098 2099 void CGOpenMPRuntime::emitTaskCall( 2100 CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, 2101 bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 2102 llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds, 2103 const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, 2104 ArrayRef<const Expr *> PrivateCopies, 2105 ArrayRef<const Expr *> FirstprivateVars, 2106 ArrayRef<const Expr *> FirstprivateCopies, 2107 ArrayRef<const Expr *> FirstprivateInits, 2108 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { 2109 auto &C = CGM.getContext(); 2110 llvm::SmallVector<PrivateDataTy, 8> Privates; 2111 // Aggregate privates and sort them by the alignment. 2112 auto I = PrivateCopies.begin(); 2113 for (auto *E : PrivateVars) { 2114 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2115 Privates.push_back(std::make_pair( 2116 C.getTypeAlignInChars(VD->getType()), 2117 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2118 /*PrivateElemInit=*/nullptr))); 2119 ++I; 2120 } 2121 I = FirstprivateCopies.begin(); 2122 auto IElemInitRef = FirstprivateInits.begin(); 2123 for (auto *E : FirstprivateVars) { 2124 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2125 Privates.push_back(std::make_pair( 2126 C.getTypeAlignInChars(VD->getType()), 2127 PrivateHelpersTy( 2128 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2129 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 2130 ++I, ++IElemInitRef; 2131 } 2132 llvm::array_pod_sort(Privates.begin(), Privates.end(), 2133 array_pod_sort_comparator); 2134 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2135 // Build type kmp_routine_entry_t (if not built yet). 2136 emitKmpRoutineEntryT(KmpInt32Ty); 2137 // Build type kmp_task_t (if not built yet). 2138 if (KmpTaskTQTy.isNull()) { 2139 KmpTaskTQTy = C.getRecordType( 2140 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); 2141 } 2142 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2143 // Build particular struct kmp_task_t for the given task. 2144 auto *KmpTaskTWithPrivatesQTyRD = 2145 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 2146 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 2147 QualType KmpTaskTWithPrivatesPtrQTy = 2148 C.getPointerType(KmpTaskTWithPrivatesQTy); 2149 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 2150 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 2151 auto KmpTaskTWithPrivatesTySize = 2152 CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy)); 2153 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 2154 2155 // Emit initial values for private copies (if any). 2156 llvm::Value *TaskPrivatesMap = nullptr; 2157 auto *TaskPrivatesMapTy = 2158 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 2159 3) 2160 ->getType(); 2161 if (!Privates.empty()) { 2162 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2163 TaskPrivatesMap = emitTaskPrivateMappingFunction( 2164 CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); 2165 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2166 TaskPrivatesMap, TaskPrivatesMapTy); 2167 } else { 2168 TaskPrivatesMap = llvm::ConstantPointerNull::get( 2169 cast<llvm::PointerType>(TaskPrivatesMapTy)); 2170 } 2171 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 2172 // kmp_task_t *tt); 2173 auto *TaskEntry = emitProxyTaskFunction( 2174 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, 2175 KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); 2176 2177 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2178 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2179 // kmp_routine_entry_t *task_entry); 2180 // Task flags. Format is taken from 2181 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 2182 // description of kmp_tasking_flags struct. 2183 const unsigned TiedFlag = 0x1; 2184 const unsigned FinalFlag = 0x2; 2185 unsigned Flags = Tied ? TiedFlag : 0; 2186 auto *TaskFlags = 2187 Final.getPointer() 2188 ? CGF.Builder.CreateSelect(Final.getPointer(), 2189 CGF.Builder.getInt32(FinalFlag), 2190 CGF.Builder.getInt32(/*C=*/0)) 2191 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 2192 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 2193 auto SharedsSize = C.getTypeSizeInChars(SharedsTy); 2194 llvm::Value *AllocArgs[] = { 2195 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags, 2196 KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize), 2197 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry, 2198 KmpRoutineEntryPtrTy)}; 2199 auto *NewTask = CGF.EmitRuntimeCall( 2200 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 2201 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2202 NewTask, KmpTaskTWithPrivatesPtrTy); 2203 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 2204 KmpTaskTWithPrivatesQTy); 2205 LValue TDBase = 2206 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2207 // Fill the data in the resulting kmp_task_t record. 2208 // Copy shareds if there are any. 2209 llvm::Value *KmpTaskSharedsPtr = nullptr; 2210 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 2211 KmpTaskSharedsPtr = CGF.EmitLoadOfScalar( 2212 CGF.EmitLValueForField( 2213 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 2214 Loc); 2215 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 2216 } 2217 // Emit initial values for private copies (if any). 2218 bool NeedsCleanup = false; 2219 if (!Privates.empty()) { 2220 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2221 auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); 2222 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 2223 LValue SharedsBase; 2224 if (!FirstprivateVars.empty()) { 2225 SharedsBase = CGF.MakeNaturalAlignAddrLValue( 2226 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2227 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 2228 SharedsTy); 2229 } 2230 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 2231 cast<CapturedStmt>(*D.getAssociatedStmt())); 2232 for (auto &&Pair : Privates) { 2233 auto *VD = Pair.second.PrivateCopy; 2234 auto *Init = VD->getAnyInitializer(); 2235 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 2236 if (Init) { 2237 if (auto *Elem = Pair.second.PrivateElemInit) { 2238 auto *OriginalVD = Pair.second.Original; 2239 auto *SharedField = CapturesInfo.lookup(OriginalVD); 2240 auto SharedRefLValue = 2241 CGF.EmitLValueForField(SharedsBase, SharedField); 2242 QualType Type = OriginalVD->getType(); 2243 if (Type->isArrayType()) { 2244 // Initialize firstprivate array. 2245 if (!isa<CXXConstructExpr>(Init) || 2246 CGF.isTrivialInitializer(Init)) { 2247 // Perform simple memcpy. 2248 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 2249 SharedRefLValue.getAddress(), Type); 2250 } else { 2251 // Initialize firstprivate array using element-by-element 2252 // intialization. 2253 CGF.EmitOMPAggregateAssign( 2254 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), 2255 Type, [&CGF, Elem, Init, &CapturesInfo]( 2256 llvm::Value *DestElement, llvm::Value *SrcElement) { 2257 // Clean up any temporaries needed by the initialization. 2258 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2259 InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{ 2260 return SrcElement; 2261 }); 2262 (void)InitScope.Privatize(); 2263 // Emit initialization for single element. 2264 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 2265 CGF, &CapturesInfo); 2266 CGF.EmitAnyExprToMem(Init, DestElement, 2267 Init->getType().getQualifiers(), 2268 /*IsInitializer=*/false); 2269 }); 2270 } 2271 } else { 2272 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2273 InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{ 2274 return SharedRefLValue.getAddress(); 2275 }); 2276 (void)InitScope.Privatize(); 2277 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 2278 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 2279 /*capturedByInit=*/false); 2280 } 2281 } else { 2282 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 2283 } 2284 } 2285 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); 2286 ++FI; 2287 } 2288 } 2289 // Provide pointer to function with destructors for privates. 2290 llvm::Value *DestructorFn = 2291 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, 2292 KmpTaskTWithPrivatesPtrQTy, 2293 KmpTaskTWithPrivatesQTy) 2294 : llvm::ConstantPointerNull::get( 2295 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 2296 LValue Destructor = CGF.EmitLValueForField( 2297 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); 2298 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2299 DestructorFn, KmpRoutineEntryPtrTy), 2300 Destructor); 2301 2302 // Process list of dependences. 2303 llvm::Value *DependInfo = nullptr; 2304 unsigned DependencesNumber = Dependences.size(); 2305 if (!Dependences.empty()) { 2306 // Dependence kind for RTL. 2307 enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 }; 2308 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 2309 RecordDecl *KmpDependInfoRD; 2310 QualType FlagsTy = C.getIntTypeForBitwidth( 2311 C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false); 2312 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 2313 if (KmpDependInfoTy.isNull()) { 2314 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 2315 KmpDependInfoRD->startDefinition(); 2316 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 2317 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 2318 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 2319 KmpDependInfoRD->completeDefinition(); 2320 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 2321 } else { 2322 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 2323 } 2324 // Define type kmp_depend_info[<Dependences.size()>]; 2325 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 2326 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, Dependences.size()), 2327 ArrayType::Normal, /*IndexTypeQuals=*/0); 2328 // kmp_depend_info[<Dependences.size()>] deps; 2329 DependInfo = CGF.CreateMemTemp(KmpDependInfoArrayTy); 2330 for (unsigned i = 0; i < DependencesNumber; ++i) { 2331 auto Addr = CGF.EmitLValue(Dependences[i].second); 2332 auto *Size = llvm::ConstantInt::get( 2333 CGF.SizeTy, 2334 C.getTypeSizeInChars(Dependences[i].second->getType()).getQuantity()); 2335 auto Base = CGF.MakeNaturalAlignAddrLValue( 2336 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, i), 2337 KmpDependInfoTy); 2338 // deps[i].base_addr = &<Dependences[i].second>; 2339 auto BaseAddrLVal = CGF.EmitLValueForField( 2340 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 2341 CGF.EmitStoreOfScalar( 2342 CGF.Builder.CreatePtrToInt(Addr.getAddress(), CGF.IntPtrTy), 2343 BaseAddrLVal); 2344 // deps[i].len = sizeof(<Dependences[i].second>); 2345 auto LenLVal = CGF.EmitLValueForField( 2346 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 2347 CGF.EmitStoreOfScalar(Size, LenLVal); 2348 // deps[i].flags = <Dependences[i].first>; 2349 RTLDependenceKindTy DepKind; 2350 switch (Dependences[i].first) { 2351 case OMPC_DEPEND_in: 2352 DepKind = DepIn; 2353 break; 2354 case OMPC_DEPEND_out: 2355 DepKind = DepOut; 2356 break; 2357 case OMPC_DEPEND_inout: 2358 DepKind = DepInOut; 2359 break; 2360 case OMPC_DEPEND_unknown: 2361 llvm_unreachable("Unknown task dependence type"); 2362 } 2363 auto FlagsLVal = CGF.EmitLValueForField( 2364 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 2365 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 2366 FlagsLVal); 2367 } 2368 DependInfo = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2369 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, 0), 2370 CGF.VoidPtrTy); 2371 } 2372 2373 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 2374 // libcall. 2375 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2376 // *new_task); 2377 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2378 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2379 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 2380 // list is not empty 2381 auto *ThreadID = getThreadID(CGF, Loc); 2382 auto *UpLoc = emitUpdateLocation(CGF, Loc); 2383 llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask}; 2384 llvm::Value *DepTaskArgs[] = { 2385 UpLoc, 2386 ThreadID, 2387 NewTask, 2388 DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr, 2389 DependInfo, 2390 DependInfo ? CGF.Builder.getInt32(0) : nullptr, 2391 DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr}; 2392 auto &&ThenCodeGen = [this, DependInfo, &TaskArgs, 2393 &DepTaskArgs](CodeGenFunction &CGF) { 2394 // TODO: add check for untied tasks. 2395 CGF.EmitRuntimeCall( 2396 createRuntimeFunction(DependInfo ? OMPRTL__kmpc_omp_task_with_deps 2397 : OMPRTL__kmpc_omp_task), 2398 DependInfo ? makeArrayRef(DepTaskArgs) : makeArrayRef(TaskArgs)); 2399 }; 2400 typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> 2401 IfCallEndCleanup; 2402 llvm::Value *DepWaitTaskArgs[] = { 2403 UpLoc, 2404 ThreadID, 2405 DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr, 2406 DependInfo, 2407 DependInfo ? CGF.Builder.getInt32(0) : nullptr, 2408 DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr}; 2409 auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 2410 DependInfo, &DepWaitTaskArgs](CodeGenFunction &CGF) { 2411 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 2412 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2413 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 2414 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 2415 // is specified. 2416 if (DependInfo) 2417 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 2418 DepWaitTaskArgs); 2419 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 2420 // kmp_task_t *new_task); 2421 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), 2422 TaskArgs); 2423 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 2424 // kmp_task_t *new_task); 2425 CGF.EHStack.pushCleanup<IfCallEndCleanup>( 2426 NormalAndEHCleanup, 2427 createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), 2428 llvm::makeArrayRef(TaskArgs)); 2429 2430 // Call proxy_task_entry(gtid, new_task); 2431 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 2432 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 2433 }; 2434 if (IfCond) { 2435 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 2436 } else { 2437 CodeGenFunction::RunCleanupsScope Scope(CGF); 2438 ThenCodeGen(CGF); 2439 } 2440 } 2441 2442 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 2443 llvm::Type *ArgsType, 2444 ArrayRef<const Expr *> LHSExprs, 2445 ArrayRef<const Expr *> RHSExprs, 2446 ArrayRef<const Expr *> ReductionOps) { 2447 auto &C = CGM.getContext(); 2448 2449 // void reduction_func(void *LHSArg, void *RHSArg); 2450 FunctionArgList Args; 2451 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2452 C.VoidPtrTy); 2453 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2454 C.VoidPtrTy); 2455 Args.push_back(&LHSArg); 2456 Args.push_back(&RHSArg); 2457 FunctionType::ExtInfo EI; 2458 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 2459 C.VoidTy, Args, EI, /*isVariadic=*/false); 2460 auto *Fn = llvm::Function::Create( 2461 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2462 ".omp.reduction.reduction_func", &CGM.getModule()); 2463 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 2464 CodeGenFunction CGF(CGM); 2465 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2466 2467 // Dst = (void*[n])(LHSArg); 2468 // Src = (void*[n])(RHSArg); 2469 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2470 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), 2471 CGF.PointerAlignInBytes), 2472 ArgsType); 2473 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2474 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), 2475 CGF.PointerAlignInBytes), 2476 ArgsType); 2477 2478 // ... 2479 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 2480 // ... 2481 CodeGenFunction::OMPPrivateScope Scope(CGF); 2482 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) { 2483 Scope.addPrivate( 2484 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()), 2485 [&]() -> llvm::Value *{ 2486 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2487 CGF.Builder.CreateAlignedLoad( 2488 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I), 2489 CGM.PointerAlignInBytes), 2490 CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType()))); 2491 }); 2492 Scope.addPrivate( 2493 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()), 2494 [&]() -> llvm::Value *{ 2495 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2496 CGF.Builder.CreateAlignedLoad( 2497 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I), 2498 CGM.PointerAlignInBytes), 2499 CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType()))); 2500 }); 2501 } 2502 Scope.Privatize(); 2503 for (auto *E : ReductionOps) { 2504 CGF.EmitIgnoredExpr(E); 2505 } 2506 Scope.ForceCleanup(); 2507 CGF.FinishFunction(); 2508 return Fn; 2509 } 2510 2511 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 2512 ArrayRef<const Expr *> LHSExprs, 2513 ArrayRef<const Expr *> RHSExprs, 2514 ArrayRef<const Expr *> ReductionOps, 2515 bool WithNowait, bool SimpleReduction) { 2516 // Next code should be emitted for reduction: 2517 // 2518 // static kmp_critical_name lock = { 0 }; 2519 // 2520 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 2521 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 2522 // ... 2523 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 2524 // *(Type<n>-1*)rhs[<n>-1]); 2525 // } 2526 // 2527 // ... 2528 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 2529 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2530 // RedList, reduce_func, &<lock>)) { 2531 // case 1: 2532 // ... 2533 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2534 // ... 2535 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2536 // break; 2537 // case 2: 2538 // ... 2539 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2540 // ... 2541 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 2542 // break; 2543 // default:; 2544 // } 2545 // 2546 // if SimpleReduction is true, only the next code is generated: 2547 // ... 2548 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2549 // ... 2550 2551 auto &C = CGM.getContext(); 2552 2553 if (SimpleReduction) { 2554 CodeGenFunction::RunCleanupsScope Scope(CGF); 2555 for (auto *E : ReductionOps) { 2556 CGF.EmitIgnoredExpr(E); 2557 } 2558 return; 2559 } 2560 2561 // 1. Build a list of reduction variables. 2562 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 2563 llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size()); 2564 QualType ReductionArrayTy = 2565 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2566 /*IndexTypeQuals=*/0); 2567 auto *ReductionList = 2568 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 2569 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) { 2570 auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I); 2571 CGF.Builder.CreateAlignedStore( 2572 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2573 CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy), 2574 Elem, CGM.PointerAlignInBytes); 2575 } 2576 2577 // 2. Emit reduce_func(). 2578 auto *ReductionFn = emitReductionFunction( 2579 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs, 2580 RHSExprs, ReductionOps); 2581 2582 // 3. Create static kmp_critical_name lock = { 0 }; 2583 auto *Lock = getCriticalRegionLock(".reduction"); 2584 2585 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2586 // RedList, reduce_func, &<lock>); 2587 auto *IdentTLoc = emitUpdateLocation( 2588 CGF, Loc, 2589 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); 2590 auto *ThreadId = getThreadID(CGF, Loc); 2591 auto *ReductionArrayTySize = llvm::ConstantInt::get( 2592 CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity()); 2593 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, 2594 CGF.VoidPtrTy); 2595 llvm::Value *Args[] = { 2596 IdentTLoc, // ident_t *<loc> 2597 ThreadId, // i32 <gtid> 2598 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 2599 ReductionArrayTySize, // size_type sizeof(RedList) 2600 RL, // void *RedList 2601 ReductionFn, // void (*) (void *, void *) <reduce_func> 2602 Lock // kmp_critical_name *&<lock> 2603 }; 2604 auto Res = CGF.EmitRuntimeCall( 2605 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 2606 : OMPRTL__kmpc_reduce), 2607 Args); 2608 2609 // 5. Build switch(res) 2610 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 2611 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 2612 2613 // 6. Build case 1: 2614 // ... 2615 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2616 // ... 2617 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2618 // break; 2619 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 2620 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 2621 CGF.EmitBlock(Case1BB); 2622 2623 { 2624 CodeGenFunction::RunCleanupsScope Scope(CGF); 2625 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2626 llvm::Value *EndArgs[] = { 2627 IdentTLoc, // ident_t *<loc> 2628 ThreadId, // i32 <gtid> 2629 Lock // kmp_critical_name *&<lock> 2630 }; 2631 CGF.EHStack 2632 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 2633 NormalAndEHCleanup, 2634 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 2635 : OMPRTL__kmpc_end_reduce), 2636 llvm::makeArrayRef(EndArgs)); 2637 for (auto *E : ReductionOps) { 2638 CGF.EmitIgnoredExpr(E); 2639 } 2640 } 2641 2642 CGF.EmitBranch(DefaultBB); 2643 2644 // 7. Build case 2: 2645 // ... 2646 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2647 // ... 2648 // break; 2649 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 2650 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 2651 CGF.EmitBlock(Case2BB); 2652 2653 { 2654 CodeGenFunction::RunCleanupsScope Scope(CGF); 2655 if (!WithNowait) { 2656 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 2657 llvm::Value *EndArgs[] = { 2658 IdentTLoc, // ident_t *<loc> 2659 ThreadId, // i32 <gtid> 2660 Lock // kmp_critical_name *&<lock> 2661 }; 2662 CGF.EHStack 2663 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 2664 NormalAndEHCleanup, 2665 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 2666 llvm::makeArrayRef(EndArgs)); 2667 } 2668 auto I = LHSExprs.begin(); 2669 for (auto *E : ReductionOps) { 2670 const Expr *XExpr = nullptr; 2671 const Expr *EExpr = nullptr; 2672 const Expr *UpExpr = nullptr; 2673 BinaryOperatorKind BO = BO_Comma; 2674 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 2675 if (BO->getOpcode() == BO_Assign) { 2676 XExpr = BO->getLHS(); 2677 UpExpr = BO->getRHS(); 2678 } 2679 } 2680 // Try to emit update expression as a simple atomic. 2681 auto *RHSExpr = UpExpr; 2682 if (RHSExpr) { 2683 // Analyze RHS part of the whole expression. 2684 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 2685 RHSExpr->IgnoreParenImpCasts())) { 2686 // If this is a conditional operator, analyze its condition for 2687 // min/max reduction operator. 2688 RHSExpr = ACO->getCond(); 2689 } 2690 if (auto *BORHS = 2691 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 2692 EExpr = BORHS->getRHS(); 2693 BO = BORHS->getOpcode(); 2694 } 2695 } 2696 if (XExpr) { 2697 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2698 LValue X = CGF.EmitLValue(XExpr); 2699 RValue E; 2700 if (EExpr) 2701 E = CGF.EmitAnyExpr(EExpr); 2702 CGF.EmitOMPAtomicSimpleUpdateExpr( 2703 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, 2704 [&CGF, UpExpr, VD](RValue XRValue) { 2705 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 2706 PrivateScope.addPrivate( 2707 VD, [&CGF, VD, XRValue]() -> llvm::Value *{ 2708 auto *LHSTemp = CGF.CreateMemTemp(VD->getType()); 2709 CGF.EmitStoreThroughLValue( 2710 XRValue, 2711 CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType())); 2712 return LHSTemp; 2713 }); 2714 (void)PrivateScope.Privatize(); 2715 return CGF.EmitAnyExpr(UpExpr); 2716 }); 2717 } else { 2718 // Emit as a critical region. 2719 emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) { 2720 CGF.EmitIgnoredExpr(E); 2721 }, Loc); 2722 } 2723 ++I; 2724 } 2725 } 2726 2727 CGF.EmitBranch(DefaultBB); 2728 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 2729 } 2730 2731 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 2732 SourceLocation Loc) { 2733 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 2734 // global_tid); 2735 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2736 // Ignore return result until untied tasks are supported. 2737 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 2738 } 2739 2740 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 2741 OpenMPDirectiveKind InnerKind, 2742 const RegionCodeGenTy &CodeGen) { 2743 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind); 2744 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 2745 } 2746 2747 namespace { 2748 enum RTCancelKind { 2749 CancelNoreq = 0, 2750 CancelParallel = 1, 2751 CancelLoop = 2, 2752 CancelSections = 3, 2753 CancelTaskgroup = 4 2754 }; 2755 } 2756 2757 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 2758 RTCancelKind CancelKind = CancelNoreq; 2759 if (CancelRegion == OMPD_parallel) 2760 CancelKind = CancelParallel; 2761 else if (CancelRegion == OMPD_for) 2762 CancelKind = CancelLoop; 2763 else if (CancelRegion == OMPD_sections) 2764 CancelKind = CancelSections; 2765 else { 2766 assert(CancelRegion == OMPD_taskgroup); 2767 CancelKind = CancelTaskgroup; 2768 } 2769 return CancelKind; 2770 } 2771 2772 void CGOpenMPRuntime::emitCancellationPointCall( 2773 CodeGenFunction &CGF, SourceLocation Loc, 2774 OpenMPDirectiveKind CancelRegion) { 2775 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2776 // global_tid, kmp_int32 cncl_kind); 2777 if (auto *OMPRegionInfo = 2778 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2779 auto CancelDest = 2780 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2781 if (CancelDest.isValid()) { 2782 llvm::Value *Args[] = { 2783 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2784 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 2785 // Ignore return result until untied tasks are supported. 2786 auto *Result = CGF.EmitRuntimeCall( 2787 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 2788 // if (__kmpc_cancellationpoint()) { 2789 // __kmpc_cancel_barrier(); 2790 // exit from construct; 2791 // } 2792 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2793 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2794 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2795 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2796 CGF.EmitBlock(ExitBB); 2797 // __kmpc_cancel_barrier(); 2798 emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false); 2799 // exit from construct; 2800 CGF.EmitBranchThroughCleanup(CancelDest); 2801 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2802 } 2803 } 2804 } 2805 2806 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 2807 OpenMPDirectiveKind CancelRegion) { 2808 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2809 // kmp_int32 cncl_kind); 2810 if (auto *OMPRegionInfo = 2811 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2812 auto CancelDest = 2813 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2814 if (CancelDest.isValid()) { 2815 llvm::Value *Args[] = { 2816 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2817 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 2818 // Ignore return result until untied tasks are supported. 2819 auto *Result = 2820 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 2821 // if (__kmpc_cancel()) { 2822 // __kmpc_cancel_barrier(); 2823 // exit from construct; 2824 // } 2825 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2826 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2827 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2828 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2829 CGF.EmitBlock(ExitBB); 2830 // __kmpc_cancel_barrier(); 2831 emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false); 2832 // exit from construct; 2833 CGF.EmitBranchThroughCleanup(CancelDest); 2834 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2835 } 2836 } 2837 } 2838 2839