1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CGCleanup.h" 17 #include "clang/AST/Decl.h" 18 #include "clang/AST/StmtOpenMP.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/IR/CallSite.h" 21 #include "llvm/IR/DerivedTypes.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/IR/Value.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <cassert> 26 27 using namespace clang; 28 using namespace CodeGen; 29 30 namespace { 31 /// \brief Base class for handling code generation inside OpenMP regions. 32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 33 public: 34 /// \brief Kinds of OpenMP regions used in codegen. 35 enum CGOpenMPRegionKind { 36 /// \brief Region with outlined function for standalone 'parallel' 37 /// directive. 38 ParallelOutlinedRegion, 39 /// \brief Region with outlined function for standalone 'task' directive. 40 TaskOutlinedRegion, 41 /// \brief Region for constructs that do not require function outlining, 42 /// like 'for', 'sections', 'atomic' etc. directives. 43 InlinedRegion, 44 }; 45 46 CGOpenMPRegionInfo(const CapturedStmt &CS, 47 const CGOpenMPRegionKind RegionKind, 48 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind) 49 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 50 CodeGen(CodeGen), Kind(Kind) {} 51 52 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 53 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind) 54 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 55 Kind(Kind) {} 56 57 /// \brief Get a variable or parameter for storing global thread id 58 /// inside OpenMP construct. 59 virtual const VarDecl *getThreadIDVariable() const = 0; 60 61 /// \brief Emit the captured statement body. 62 virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 63 64 /// \brief Get an LValue for the current ThreadID variable. 65 /// \return LValue for thread id variable. This LValue always has type int32*. 66 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 67 68 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 69 70 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 71 72 static bool classof(const CGCapturedStmtInfo *Info) { 73 return Info->getKind() == CR_OpenMP; 74 } 75 76 protected: 77 CGOpenMPRegionKind RegionKind; 78 const RegionCodeGenTy &CodeGen; 79 OpenMPDirectiveKind Kind; 80 }; 81 82 /// \brief API for captured statement code generation in OpenMP constructs. 83 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 84 public: 85 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 86 const RegionCodeGenTy &CodeGen, 87 OpenMPDirectiveKind Kind) 88 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind), 89 ThreadIDVar(ThreadIDVar) { 90 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 91 } 92 /// \brief Get a variable or parameter for storing global thread id 93 /// inside OpenMP construct. 94 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 95 96 /// \brief Get the name of the capture helper. 97 StringRef getHelperName() const override { return ".omp_outlined."; } 98 99 static bool classof(const CGCapturedStmtInfo *Info) { 100 return CGOpenMPRegionInfo::classof(Info) && 101 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 102 ParallelOutlinedRegion; 103 } 104 105 private: 106 /// \brief A variable or parameter storing global thread id for OpenMP 107 /// constructs. 108 const VarDecl *ThreadIDVar; 109 }; 110 111 /// \brief API for captured statement code generation in OpenMP constructs. 112 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 113 public: 114 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 115 const VarDecl *ThreadIDVar, 116 const RegionCodeGenTy &CodeGen, 117 OpenMPDirectiveKind Kind) 118 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind), 119 ThreadIDVar(ThreadIDVar) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 /// \brief Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// \brief Get an LValue for the current ThreadID variable. 127 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 128 129 /// \brief Get the name of the capture helper. 130 StringRef getHelperName() const override { return ".omp_outlined."; } 131 132 static bool classof(const CGCapturedStmtInfo *Info) { 133 return CGOpenMPRegionInfo::classof(Info) && 134 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 135 TaskOutlinedRegion; 136 } 137 138 private: 139 /// \brief A variable or parameter storing global thread id for OpenMP 140 /// constructs. 141 const VarDecl *ThreadIDVar; 142 }; 143 144 /// \brief API for inlined captured statement code generation in OpenMP 145 /// constructs. 146 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 147 public: 148 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 149 const RegionCodeGenTy &CodeGen, 150 OpenMPDirectiveKind Kind) 151 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind), OldCSI(OldCSI), 152 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 153 // \brief Retrieve the value of the context parameter. 154 llvm::Value *getContextValue() const override { 155 if (OuterRegionInfo) 156 return OuterRegionInfo->getContextValue(); 157 llvm_unreachable("No context value for inlined OpenMP region"); 158 } 159 virtual void setContextValue(llvm::Value *V) override { 160 if (OuterRegionInfo) { 161 OuterRegionInfo->setContextValue(V); 162 return; 163 } 164 llvm_unreachable("No context value for inlined OpenMP region"); 165 } 166 /// \brief Lookup the captured field decl for a variable. 167 const FieldDecl *lookup(const VarDecl *VD) const override { 168 if (OuterRegionInfo) 169 return OuterRegionInfo->lookup(VD); 170 // If there is no outer outlined region,no need to lookup in a list of 171 // captured variables, we can use the original one. 172 return nullptr; 173 } 174 FieldDecl *getThisFieldDecl() const override { 175 if (OuterRegionInfo) 176 return OuterRegionInfo->getThisFieldDecl(); 177 return nullptr; 178 } 179 /// \brief Get a variable or parameter for storing global thread id 180 /// inside OpenMP construct. 181 const VarDecl *getThreadIDVariable() const override { 182 if (OuterRegionInfo) 183 return OuterRegionInfo->getThreadIDVariable(); 184 return nullptr; 185 } 186 187 /// \brief Get the name of the capture helper. 188 StringRef getHelperName() const override { 189 if (auto *OuterRegionInfo = getOldCSI()) 190 return OuterRegionInfo->getHelperName(); 191 llvm_unreachable("No helper name for inlined OpenMP construct"); 192 } 193 194 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 195 196 static bool classof(const CGCapturedStmtInfo *Info) { 197 return CGOpenMPRegionInfo::classof(Info) && 198 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 199 } 200 201 private: 202 /// \brief CodeGen info about outer OpenMP region. 203 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 204 CGOpenMPRegionInfo *OuterRegionInfo; 205 }; 206 207 /// \brief RAII for emitting code of OpenMP constructs. 208 class InlinedOpenMPRegionRAII { 209 CodeGenFunction &CGF; 210 211 public: 212 /// \brief Constructs region for combined constructs. 213 /// \param CodeGen Code generation sequence for combined directives. Includes 214 /// a list of functions used for code generation of implicitly inlined 215 /// regions. 216 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 217 OpenMPDirectiveKind Kind) 218 : CGF(CGF) { 219 // Start emission for the construct. 220 CGF.CapturedStmtInfo = 221 new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen, Kind); 222 } 223 ~InlinedOpenMPRegionRAII() { 224 // Restore original CapturedStmtInfo only if we're done with code emission. 225 auto *OldCSI = 226 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 227 delete CGF.CapturedStmtInfo; 228 CGF.CapturedStmtInfo = OldCSI; 229 } 230 }; 231 232 } // namespace 233 234 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 235 return CGF.MakeNaturalAlignAddrLValue( 236 CGF.Builder.CreateAlignedLoad( 237 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 238 CGF.PointerAlignInBytes), 239 getThreadIDVariable() 240 ->getType() 241 ->castAs<PointerType>() 242 ->getPointeeType()); 243 } 244 245 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 246 // 1.2.2 OpenMP Language Terminology 247 // Structured block - An executable statement with a single entry at the 248 // top and a single exit at the bottom. 249 // The point of exit cannot be a branch out of the structured block. 250 // longjmp() and throw() must not violate the entry/exit criteria. 251 CGF.EHStack.pushTerminate(); 252 { 253 CodeGenFunction::RunCleanupsScope Scope(CGF); 254 CodeGen(CGF); 255 } 256 CGF.EHStack.popTerminate(); 257 } 258 259 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 260 CodeGenFunction &CGF) { 261 return CGF.MakeNaturalAlignAddrLValue( 262 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 263 getThreadIDVariable()->getType()); 264 } 265 266 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 267 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { 268 IdentTy = llvm::StructType::create( 269 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 270 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 271 CGM.Int8PtrTy /* psource */, nullptr); 272 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 273 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 274 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 275 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 276 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 277 } 278 279 void CGOpenMPRuntime::clear() { 280 InternalVars.clear(); 281 } 282 283 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 286 assert(ThreadIDVar->getType()->isPointerType() && 287 "thread id variable must be of type kmp_int32 *"); 288 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 289 CodeGenFunction CGF(CGM, true); 290 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind); 291 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 292 return CGF.GenerateCapturedStmtFunction(*CS); 293 } 294 295 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 296 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 297 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 298 assert(!ThreadIDVar->getType()->isPointerType() && 299 "thread id variable must be of type kmp_int32 for tasks"); 300 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 301 CodeGenFunction CGF(CGM, true); 302 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 303 InnermostKind); 304 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 305 return CGF.GenerateCapturedStmtFunction(*CS); 306 } 307 308 llvm::Value * 309 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 310 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 311 if (!Entry) { 312 if (!DefaultOpenMPPSource) { 313 // Initialize default location for psource field of ident_t structure of 314 // all ident_t objects. Format is ";file;function;line;column;;". 315 // Taken from 316 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 317 DefaultOpenMPPSource = 318 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;"); 319 DefaultOpenMPPSource = 320 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 321 } 322 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 323 CGM.getModule(), IdentTy, /*isConstant*/ true, 324 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 325 DefaultOpenMPLocation->setUnnamedAddr(true); 326 327 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 328 llvm::Constant *Values[] = {Zero, 329 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 330 Zero, Zero, DefaultOpenMPPSource}; 331 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 332 DefaultOpenMPLocation->setInitializer(Init); 333 OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation; 334 return DefaultOpenMPLocation; 335 } 336 return Entry; 337 } 338 339 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 340 SourceLocation Loc, 341 OpenMPLocationFlags Flags) { 342 // If no debug info is generated - return global default location. 343 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 344 Loc.isInvalid()) 345 return getOrCreateDefaultLocation(Flags); 346 347 assert(CGF.CurFn && "No function in current CodeGenFunction."); 348 349 llvm::Value *LocValue = nullptr; 350 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 351 if (I != OpenMPLocThreadIDMap.end()) 352 LocValue = I->second.DebugLoc; 353 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 354 // GetOpenMPThreadID was called before this routine. 355 if (LocValue == nullptr) { 356 // Generate "ident_t .kmpc_loc.addr;" 357 llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); 358 AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); 359 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 360 Elem.second.DebugLoc = AI; 361 LocValue = AI; 362 363 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 364 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 365 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 366 llvm::ConstantExpr::getSizeOf(IdentTy), 367 CGM.PointerAlignInBytes); 368 } 369 370 // char **psource = &.kmpc_loc_<flags>.addr.psource; 371 auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0, 372 IdentField_PSource); 373 374 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 375 if (OMPDebugLoc == nullptr) { 376 SmallString<128> Buffer2; 377 llvm::raw_svector_ostream OS2(Buffer2); 378 // Build debug location 379 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 380 OS2 << ";" << PLoc.getFilename() << ";"; 381 if (const FunctionDecl *FD = 382 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 383 OS2 << FD->getQualifiedNameAsString(); 384 } 385 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 386 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 387 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 388 } 389 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 390 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 391 392 return LocValue; 393 } 394 395 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 396 SourceLocation Loc) { 397 assert(CGF.CurFn && "No function in current CodeGenFunction."); 398 399 llvm::Value *ThreadID = nullptr; 400 // Check whether we've already cached a load of the thread id in this 401 // function. 402 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 403 if (I != OpenMPLocThreadIDMap.end()) { 404 ThreadID = I->second.ThreadID; 405 if (ThreadID != nullptr) 406 return ThreadID; 407 } 408 if (auto OMPRegionInfo = 409 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 410 if (OMPRegionInfo->getThreadIDVariable()) { 411 // Check if this an outlined function with thread id passed as argument. 412 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 413 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 414 // If value loaded in entry block, cache it and use it everywhere in 415 // function. 416 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 417 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 418 Elem.second.ThreadID = ThreadID; 419 } 420 return ThreadID; 421 } 422 } 423 424 // This is not an outlined function region - need to call __kmpc_int32 425 // kmpc_global_thread_num(ident_t *loc). 426 // Generate thread id value and cache this value for use across the 427 // function. 428 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 429 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 430 ThreadID = 431 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 432 emitUpdateLocation(CGF, Loc)); 433 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 434 Elem.second.ThreadID = ThreadID; 435 return ThreadID; 436 } 437 438 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 439 assert(CGF.CurFn && "No function in current CodeGenFunction."); 440 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 441 OpenMPLocThreadIDMap.erase(CGF.CurFn); 442 } 443 444 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 445 return llvm::PointerType::getUnqual(IdentTy); 446 } 447 448 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 449 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 450 } 451 452 llvm::Constant * 453 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 454 llvm::Constant *RTLFn = nullptr; 455 switch (Function) { 456 case OMPRTL__kmpc_fork_call: { 457 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 458 // microtask, ...); 459 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 460 getKmpc_MicroPointerTy()}; 461 llvm::FunctionType *FnTy = 462 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 463 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 464 break; 465 } 466 case OMPRTL__kmpc_global_thread_num: { 467 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 468 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 469 llvm::FunctionType *FnTy = 470 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 471 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 472 break; 473 } 474 case OMPRTL__kmpc_threadprivate_cached: { 475 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 476 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 477 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 478 CGM.VoidPtrTy, CGM.SizeTy, 479 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 480 llvm::FunctionType *FnTy = 481 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 482 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 483 break; 484 } 485 case OMPRTL__kmpc_critical: { 486 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 487 // kmp_critical_name *crit); 488 llvm::Type *TypeParams[] = { 489 getIdentTyPointerTy(), CGM.Int32Ty, 490 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 491 llvm::FunctionType *FnTy = 492 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 493 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 494 break; 495 } 496 case OMPRTL__kmpc_threadprivate_register: { 497 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 498 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 499 // typedef void *(*kmpc_ctor)(void *); 500 auto KmpcCtorTy = 501 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 502 /*isVarArg*/ false)->getPointerTo(); 503 // typedef void *(*kmpc_cctor)(void *, void *); 504 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 505 auto KmpcCopyCtorTy = 506 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 507 /*isVarArg*/ false)->getPointerTo(); 508 // typedef void (*kmpc_dtor)(void *); 509 auto KmpcDtorTy = 510 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 511 ->getPointerTo(); 512 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 513 KmpcCopyCtorTy, KmpcDtorTy}; 514 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 515 /*isVarArg*/ false); 516 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 517 break; 518 } 519 case OMPRTL__kmpc_end_critical: { 520 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 521 // kmp_critical_name *crit); 522 llvm::Type *TypeParams[] = { 523 getIdentTyPointerTy(), CGM.Int32Ty, 524 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 525 llvm::FunctionType *FnTy = 526 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 527 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 528 break; 529 } 530 case OMPRTL__kmpc_cancel_barrier: { 531 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 532 // global_tid); 533 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 534 llvm::FunctionType *FnTy = 535 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 536 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 537 break; 538 } 539 case OMPRTL__kmpc_barrier: { 540 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 541 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 542 llvm::FunctionType *FnTy = 543 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 544 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 545 break; 546 } 547 case OMPRTL__kmpc_for_static_fini: { 548 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 549 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 550 llvm::FunctionType *FnTy = 551 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 552 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 553 break; 554 } 555 case OMPRTL__kmpc_push_num_threads: { 556 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 557 // kmp_int32 num_threads) 558 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 559 CGM.Int32Ty}; 560 llvm::FunctionType *FnTy = 561 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 562 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 563 break; 564 } 565 case OMPRTL__kmpc_serialized_parallel: { 566 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 567 // global_tid); 568 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 569 llvm::FunctionType *FnTy = 570 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 571 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 572 break; 573 } 574 case OMPRTL__kmpc_end_serialized_parallel: { 575 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 576 // global_tid); 577 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 578 llvm::FunctionType *FnTy = 579 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 580 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 581 break; 582 } 583 case OMPRTL__kmpc_flush: { 584 // Build void __kmpc_flush(ident_t *loc); 585 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 586 llvm::FunctionType *FnTy = 587 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 588 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 589 break; 590 } 591 case OMPRTL__kmpc_master: { 592 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 593 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 594 llvm::FunctionType *FnTy = 595 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 596 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 597 break; 598 } 599 case OMPRTL__kmpc_end_master: { 600 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 601 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 602 llvm::FunctionType *FnTy = 603 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 604 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 605 break; 606 } 607 case OMPRTL__kmpc_omp_taskyield: { 608 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 609 // int end_part); 610 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 611 llvm::FunctionType *FnTy = 612 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 613 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 614 break; 615 } 616 case OMPRTL__kmpc_single: { 617 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 618 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 619 llvm::FunctionType *FnTy = 620 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 621 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 622 break; 623 } 624 case OMPRTL__kmpc_end_single: { 625 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 626 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 627 llvm::FunctionType *FnTy = 628 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 629 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 630 break; 631 } 632 case OMPRTL__kmpc_omp_task_alloc: { 633 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 634 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 635 // kmp_routine_entry_t *task_entry); 636 assert(KmpRoutineEntryPtrTy != nullptr && 637 "Type kmp_routine_entry_t must be created."); 638 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 639 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 640 // Return void * and then cast to particular kmp_task_t type. 641 llvm::FunctionType *FnTy = 642 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 643 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 644 break; 645 } 646 case OMPRTL__kmpc_omp_task: { 647 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 648 // *new_task); 649 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 650 CGM.VoidPtrTy}; 651 llvm::FunctionType *FnTy = 652 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 653 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 654 break; 655 } 656 case OMPRTL__kmpc_copyprivate: { 657 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 658 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 659 // kmp_int32 didit); 660 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 661 auto *CpyFnTy = 662 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 663 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 664 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 665 CGM.Int32Ty}; 666 llvm::FunctionType *FnTy = 667 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 668 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 669 break; 670 } 671 case OMPRTL__kmpc_reduce: { 672 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 673 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 674 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 675 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 676 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 677 /*isVarArg=*/false); 678 llvm::Type *TypeParams[] = { 679 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 680 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 681 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 682 llvm::FunctionType *FnTy = 683 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 684 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 685 break; 686 } 687 case OMPRTL__kmpc_reduce_nowait: { 688 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 689 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 690 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 691 // *lck); 692 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 693 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 694 /*isVarArg=*/false); 695 llvm::Type *TypeParams[] = { 696 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 697 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 698 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 699 llvm::FunctionType *FnTy = 700 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 701 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 702 break; 703 } 704 case OMPRTL__kmpc_end_reduce: { 705 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 706 // kmp_critical_name *lck); 707 llvm::Type *TypeParams[] = { 708 getIdentTyPointerTy(), CGM.Int32Ty, 709 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 710 llvm::FunctionType *FnTy = 711 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 712 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 713 break; 714 } 715 case OMPRTL__kmpc_end_reduce_nowait: { 716 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 717 // kmp_critical_name *lck); 718 llvm::Type *TypeParams[] = { 719 getIdentTyPointerTy(), CGM.Int32Ty, 720 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 721 llvm::FunctionType *FnTy = 722 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 723 RTLFn = 724 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 725 break; 726 } 727 case OMPRTL__kmpc_omp_task_begin_if0: { 728 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 729 // *new_task); 730 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 731 CGM.VoidPtrTy}; 732 llvm::FunctionType *FnTy = 733 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 734 RTLFn = 735 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 736 break; 737 } 738 case OMPRTL__kmpc_omp_task_complete_if0: { 739 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 740 // *new_task); 741 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 742 CGM.VoidPtrTy}; 743 llvm::FunctionType *FnTy = 744 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 745 RTLFn = CGM.CreateRuntimeFunction(FnTy, 746 /*Name=*/"__kmpc_omp_task_complete_if0"); 747 break; 748 } 749 case OMPRTL__kmpc_ordered: { 750 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 751 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 752 llvm::FunctionType *FnTy = 753 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 754 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 755 break; 756 } 757 case OMPRTL__kmpc_end_ordered: { 758 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 759 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 760 llvm::FunctionType *FnTy = 761 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 762 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 763 break; 764 } 765 case OMPRTL__kmpc_omp_taskwait: { 766 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 767 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 768 llvm::FunctionType *FnTy = 769 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 770 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 771 break; 772 } 773 case OMPRTL__kmpc_taskgroup: { 774 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 775 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 776 llvm::FunctionType *FnTy = 777 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 778 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 779 break; 780 } 781 case OMPRTL__kmpc_end_taskgroup: { 782 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 783 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 784 llvm::FunctionType *FnTy = 785 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 786 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 787 break; 788 } 789 case OMPRTL__kmpc_push_proc_bind: { 790 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 791 // int proc_bind) 792 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 793 llvm::FunctionType *FnTy = 794 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 795 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 796 break; 797 } 798 case OMPRTL__kmpc_omp_task_with_deps: { 799 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 800 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 801 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 802 llvm::Type *TypeParams[] = { 803 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 804 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 805 llvm::FunctionType *FnTy = 806 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 807 RTLFn = 808 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 809 break; 810 } 811 case OMPRTL__kmpc_omp_wait_deps: { 812 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 813 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 814 // kmp_depend_info_t *noalias_dep_list); 815 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 816 CGM.Int32Ty, CGM.VoidPtrTy, 817 CGM.Int32Ty, CGM.VoidPtrTy}; 818 llvm::FunctionType *FnTy = 819 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 820 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 821 break; 822 } 823 case OMPRTL__kmpc_cancellationpoint: { 824 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 825 // global_tid, kmp_int32 cncl_kind) 826 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 827 llvm::FunctionType *FnTy = 828 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 829 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 830 break; 831 } 832 case OMPRTL__kmpc_cancel: { 833 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 834 // kmp_int32 cncl_kind) 835 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 836 llvm::FunctionType *FnTy = 837 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 838 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 839 break; 840 } 841 } 842 return RTLFn; 843 } 844 845 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 846 bool IVSigned) { 847 assert((IVSize == 32 || IVSize == 64) && 848 "IV size is not compatible with the omp runtime"); 849 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 850 : "__kmpc_for_static_init_4u") 851 : (IVSigned ? "__kmpc_for_static_init_8" 852 : "__kmpc_for_static_init_8u"); 853 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 854 auto PtrTy = llvm::PointerType::getUnqual(ITy); 855 llvm::Type *TypeParams[] = { 856 getIdentTyPointerTy(), // loc 857 CGM.Int32Ty, // tid 858 CGM.Int32Ty, // schedtype 859 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 860 PtrTy, // p_lower 861 PtrTy, // p_upper 862 PtrTy, // p_stride 863 ITy, // incr 864 ITy // chunk 865 }; 866 llvm::FunctionType *FnTy = 867 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 868 return CGM.CreateRuntimeFunction(FnTy, Name); 869 } 870 871 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 872 bool IVSigned) { 873 assert((IVSize == 32 || IVSize == 64) && 874 "IV size is not compatible with the omp runtime"); 875 auto Name = 876 IVSize == 32 877 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 878 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 879 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 880 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 881 CGM.Int32Ty, // tid 882 CGM.Int32Ty, // schedtype 883 ITy, // lower 884 ITy, // upper 885 ITy, // stride 886 ITy // chunk 887 }; 888 llvm::FunctionType *FnTy = 889 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 890 return CGM.CreateRuntimeFunction(FnTy, Name); 891 } 892 893 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 894 bool IVSigned) { 895 assert((IVSize == 32 || IVSize == 64) && 896 "IV size is not compatible with the omp runtime"); 897 auto Name = 898 IVSize == 32 899 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 900 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 901 llvm::Type *TypeParams[] = { 902 getIdentTyPointerTy(), // loc 903 CGM.Int32Ty, // tid 904 }; 905 llvm::FunctionType *FnTy = 906 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 907 return CGM.CreateRuntimeFunction(FnTy, Name); 908 } 909 910 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 911 bool IVSigned) { 912 assert((IVSize == 32 || IVSize == 64) && 913 "IV size is not compatible with the omp runtime"); 914 auto Name = 915 IVSize == 32 916 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 917 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 918 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 919 auto PtrTy = llvm::PointerType::getUnqual(ITy); 920 llvm::Type *TypeParams[] = { 921 getIdentTyPointerTy(), // loc 922 CGM.Int32Ty, // tid 923 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 924 PtrTy, // p_lower 925 PtrTy, // p_upper 926 PtrTy // p_stride 927 }; 928 llvm::FunctionType *FnTy = 929 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 930 return CGM.CreateRuntimeFunction(FnTy, Name); 931 } 932 933 llvm::Constant * 934 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 935 assert(!CGM.getLangOpts().OpenMPUseTLS || 936 !CGM.getContext().getTargetInfo().isTLSSupported()); 937 // Lookup the entry, lazily creating it if necessary. 938 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 939 Twine(CGM.getMangledName(VD)) + ".cache."); 940 } 941 942 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 943 const VarDecl *VD, 944 llvm::Value *VDAddr, 945 SourceLocation Loc) { 946 if (CGM.getLangOpts().OpenMPUseTLS && 947 CGM.getContext().getTargetInfo().isTLSSupported()) 948 return VDAddr; 949 950 auto VarTy = VDAddr->getType()->getPointerElementType(); 951 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 952 CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), 953 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 954 getOrCreateThreadPrivateCache(VD)}; 955 return CGF.EmitRuntimeCall( 956 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); 957 } 958 959 void CGOpenMPRuntime::emitThreadPrivateVarInit( 960 CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, 961 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 962 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 963 // library. 964 auto OMPLoc = emitUpdateLocation(CGF, Loc); 965 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 966 OMPLoc); 967 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 968 // to register constructor/destructor for variable. 969 llvm::Value *Args[] = {OMPLoc, 970 CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), 971 Ctor, CopyCtor, Dtor}; 972 CGF.EmitRuntimeCall( 973 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 974 } 975 976 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 977 const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, 978 bool PerformInit, CodeGenFunction *CGF) { 979 if (CGM.getLangOpts().OpenMPUseTLS && 980 CGM.getContext().getTargetInfo().isTLSSupported()) 981 return nullptr; 982 983 VD = VD->getDefinition(CGM.getContext()); 984 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 985 ThreadPrivateWithDefinition.insert(VD); 986 QualType ASTTy = VD->getType(); 987 988 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 989 auto Init = VD->getAnyInitializer(); 990 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 991 // Generate function that re-emits the declaration's initializer into the 992 // threadprivate copy of the variable VD 993 CodeGenFunction CtorCGF(CGM); 994 FunctionArgList Args; 995 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 996 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 997 Args.push_back(&Dst); 998 999 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1000 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 1001 /*isVariadic=*/false); 1002 auto FTy = CGM.getTypes().GetFunctionType(FI); 1003 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1004 FTy, ".__kmpc_global_ctor_.", Loc); 1005 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1006 Args, SourceLocation()); 1007 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1008 CtorCGF.GetAddrOfLocalVar(&Dst), 1009 /*Volatile=*/false, CGM.PointerAlignInBytes, 1010 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1011 auto Arg = CtorCGF.Builder.CreatePointerCast( 1012 ArgVal, 1013 CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); 1014 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1015 /*IsInitializer=*/true); 1016 ArgVal = CtorCGF.EmitLoadOfScalar( 1017 CtorCGF.GetAddrOfLocalVar(&Dst), 1018 /*Volatile=*/false, CGM.PointerAlignInBytes, 1019 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1020 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1021 CtorCGF.FinishFunction(); 1022 Ctor = Fn; 1023 } 1024 if (VD->getType().isDestructedType() != QualType::DK_none) { 1025 // Generate function that emits destructor call for the threadprivate copy 1026 // of the variable VD 1027 CodeGenFunction DtorCGF(CGM); 1028 FunctionArgList Args; 1029 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1030 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1031 Args.push_back(&Dst); 1032 1033 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1034 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 1035 /*isVariadic=*/false); 1036 auto FTy = CGM.getTypes().GetFunctionType(FI); 1037 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1038 FTy, ".__kmpc_global_dtor_.", Loc); 1039 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1040 SourceLocation()); 1041 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1042 DtorCGF.GetAddrOfLocalVar(&Dst), 1043 /*Volatile=*/false, CGM.PointerAlignInBytes, 1044 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1045 DtorCGF.emitDestroy(ArgVal, ASTTy, 1046 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1047 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1048 DtorCGF.FinishFunction(); 1049 Dtor = Fn; 1050 } 1051 // Do not emit init function if it is not required. 1052 if (!Ctor && !Dtor) 1053 return nullptr; 1054 1055 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1056 auto CopyCtorTy = 1057 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1058 /*isVarArg=*/false)->getPointerTo(); 1059 // Copying constructor for the threadprivate variable. 1060 // Must be NULL - reserved by runtime, but currently it requires that this 1061 // parameter is always NULL. Otherwise it fires assertion. 1062 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1063 if (Ctor == nullptr) { 1064 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1065 /*isVarArg=*/false)->getPointerTo(); 1066 Ctor = llvm::Constant::getNullValue(CtorTy); 1067 } 1068 if (Dtor == nullptr) { 1069 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1070 /*isVarArg=*/false)->getPointerTo(); 1071 Dtor = llvm::Constant::getNullValue(DtorTy); 1072 } 1073 if (!CGF) { 1074 auto InitFunctionTy = 1075 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1076 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1077 InitFunctionTy, ".__omp_threadprivate_init_."); 1078 CodeGenFunction InitCGF(CGM); 1079 FunctionArgList ArgList; 1080 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1081 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1082 Loc); 1083 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1084 InitCGF.FinishFunction(); 1085 return InitFunction; 1086 } 1087 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1088 } 1089 return nullptr; 1090 } 1091 1092 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1093 /// function. Here is the logic: 1094 /// if (Cond) { 1095 /// ThenGen(); 1096 /// } else { 1097 /// ElseGen(); 1098 /// } 1099 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1100 const RegionCodeGenTy &ThenGen, 1101 const RegionCodeGenTy &ElseGen) { 1102 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1103 1104 // If the condition constant folds and can be elided, try to avoid emitting 1105 // the condition and the dead arm of the if/else. 1106 bool CondConstant; 1107 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1108 CodeGenFunction::RunCleanupsScope Scope(CGF); 1109 if (CondConstant) { 1110 ThenGen(CGF); 1111 } else { 1112 ElseGen(CGF); 1113 } 1114 return; 1115 } 1116 1117 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1118 // emit the conditional branch. 1119 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1120 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1121 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1122 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1123 1124 // Emit the 'then' code. 1125 CGF.EmitBlock(ThenBlock); 1126 { 1127 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1128 ThenGen(CGF); 1129 } 1130 CGF.EmitBranch(ContBlock); 1131 // Emit the 'else' code if present. 1132 { 1133 // There is no need to emit line number for unconditional branch. 1134 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1135 CGF.EmitBlock(ElseBlock); 1136 } 1137 { 1138 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1139 ElseGen(CGF); 1140 } 1141 { 1142 // There is no need to emit line number for unconditional branch. 1143 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1144 CGF.EmitBranch(ContBlock); 1145 } 1146 // Emit the continuation block for code after the if. 1147 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1148 } 1149 1150 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1151 llvm::Value *OutlinedFn, 1152 llvm::Value *CapturedStruct, 1153 const Expr *IfCond) { 1154 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1155 auto &&ThenGen = 1156 [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) { 1157 // Build call __kmpc_fork_call(loc, 1, microtask, 1158 // captured_struct/*context*/) 1159 llvm::Value *Args[] = { 1160 RTLoc, 1161 CGF.Builder.getInt32( 1162 1), // Number of arguments after 'microtask' argument 1163 // (there is only one additional argument - 'context') 1164 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), 1165 CGF.EmitCastToVoidPtr(CapturedStruct)}; 1166 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 1167 CGF.EmitRuntimeCall(RTLFn, Args); 1168 }; 1169 auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc]( 1170 CodeGenFunction &CGF) { 1171 auto ThreadID = getThreadID(CGF, Loc); 1172 // Build calls: 1173 // __kmpc_serialized_parallel(&Loc, GTid); 1174 llvm::Value *Args[] = {RTLoc, ThreadID}; 1175 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 1176 Args); 1177 1178 // OutlinedFn(>id, &zero, CapturedStruct); 1179 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 1180 auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, 1181 /*Signed*/ true); 1182 auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr"); 1183 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1184 llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct}; 1185 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1186 1187 // __kmpc_end_serialized_parallel(&Loc, GTid); 1188 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 1189 CGF.EmitRuntimeCall( 1190 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 1191 }; 1192 if (IfCond) { 1193 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1194 } else { 1195 CodeGenFunction::RunCleanupsScope Scope(CGF); 1196 ThenGen(CGF); 1197 } 1198 } 1199 1200 // If we're inside an (outlined) parallel region, use the region info's 1201 // thread-ID variable (it is passed in a first argument of the outlined function 1202 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1203 // regular serial code region, get thread ID by calling kmp_int32 1204 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1205 // return the address of that temp. 1206 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1207 SourceLocation Loc) { 1208 if (auto OMPRegionInfo = 1209 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1210 if (OMPRegionInfo->getThreadIDVariable()) 1211 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1212 1213 auto ThreadID = getThreadID(CGF, Loc); 1214 auto Int32Ty = 1215 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1216 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1217 CGF.EmitStoreOfScalar(ThreadID, 1218 CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty)); 1219 1220 return ThreadIDTemp; 1221 } 1222 1223 llvm::Constant * 1224 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1225 const llvm::Twine &Name) { 1226 SmallString<256> Buffer; 1227 llvm::raw_svector_ostream Out(Buffer); 1228 Out << Name; 1229 auto RuntimeName = Out.str(); 1230 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1231 if (Elem.second) { 1232 assert(Elem.second->getType()->getPointerElementType() == Ty && 1233 "OMP internal variable has different type than requested"); 1234 return &*Elem.second; 1235 } 1236 1237 return Elem.second = new llvm::GlobalVariable( 1238 CGM.getModule(), Ty, /*IsConstant*/ false, 1239 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1240 Elem.first()); 1241 } 1242 1243 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1244 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1245 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1246 } 1247 1248 namespace { 1249 template <size_t N> class CallEndCleanup : public EHScopeStack::Cleanup { 1250 llvm::Value *Callee; 1251 llvm::Value *Args[N]; 1252 1253 public: 1254 CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) 1255 : Callee(Callee) { 1256 assert(CleanupArgs.size() == N); 1257 std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); 1258 } 1259 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 1260 CGF.EmitRuntimeCall(Callee, Args); 1261 } 1262 }; 1263 } // namespace 1264 1265 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1266 StringRef CriticalName, 1267 const RegionCodeGenTy &CriticalOpGen, 1268 SourceLocation Loc) { 1269 // __kmpc_critical(ident_t *, gtid, Lock); 1270 // CriticalOpGen(); 1271 // __kmpc_end_critical(ident_t *, gtid, Lock); 1272 // Prepare arguments and build a call to __kmpc_critical 1273 { 1274 CodeGenFunction::RunCleanupsScope Scope(CGF); 1275 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1276 getCriticalRegionLock(CriticalName)}; 1277 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 1278 // Build a call to __kmpc_end_critical 1279 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1280 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), 1281 llvm::makeArrayRef(Args)); 1282 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 1283 } 1284 } 1285 1286 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 1287 OpenMPDirectiveKind Kind, 1288 const RegionCodeGenTy &BodyOpGen) { 1289 llvm::Value *CallBool = CGF.EmitScalarConversion( 1290 IfCond, 1291 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 1292 CGF.getContext().BoolTy); 1293 1294 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1295 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 1296 // Generate the branch (If-stmt) 1297 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1298 CGF.EmitBlock(ThenBlock); 1299 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); 1300 // Emit the rest of bblocks/branches 1301 CGF.EmitBranch(ContBlock); 1302 CGF.EmitBlock(ContBlock, true); 1303 } 1304 1305 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 1306 const RegionCodeGenTy &MasterOpGen, 1307 SourceLocation Loc) { 1308 // if(__kmpc_master(ident_t *, gtid)) { 1309 // MasterOpGen(); 1310 // __kmpc_end_master(ident_t *, gtid); 1311 // } 1312 // Prepare arguments and build a call to __kmpc_master 1313 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1314 auto *IsMaster = 1315 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 1316 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1317 MasterCallEndCleanup; 1318 emitIfStmt(CGF, IsMaster, OMPD_master, [&](CodeGenFunction &CGF) -> void { 1319 CodeGenFunction::RunCleanupsScope Scope(CGF); 1320 CGF.EHStack.pushCleanup<MasterCallEndCleanup>( 1321 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), 1322 llvm::makeArrayRef(Args)); 1323 MasterOpGen(CGF); 1324 }); 1325 } 1326 1327 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 1328 SourceLocation Loc) { 1329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 1330 llvm::Value *Args[] = { 1331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 1333 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 1334 } 1335 1336 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 1337 const RegionCodeGenTy &TaskgroupOpGen, 1338 SourceLocation Loc) { 1339 // __kmpc_taskgroup(ident_t *, gtid); 1340 // TaskgroupOpGen(); 1341 // __kmpc_end_taskgroup(ident_t *, gtid); 1342 // Prepare arguments and build a call to __kmpc_taskgroup 1343 { 1344 CodeGenFunction::RunCleanupsScope Scope(CGF); 1345 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1346 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); 1347 // Build a call to __kmpc_end_taskgroup 1348 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1349 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 1350 llvm::makeArrayRef(Args)); 1351 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 1352 } 1353 } 1354 1355 static llvm::Value *emitCopyprivateCopyFunction( 1356 CodeGenModule &CGM, llvm::Type *ArgsType, 1357 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 1358 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 1359 auto &C = CGM.getContext(); 1360 // void copy_func(void *LHSArg, void *RHSArg); 1361 FunctionArgList Args; 1362 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1363 C.VoidPtrTy); 1364 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1365 C.VoidPtrTy); 1366 Args.push_back(&LHSArg); 1367 Args.push_back(&RHSArg); 1368 FunctionType::ExtInfo EI; 1369 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1370 C.VoidTy, Args, EI, /*isVariadic=*/false); 1371 auto *Fn = llvm::Function::Create( 1372 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1373 ".omp.copyprivate.copy_func", &CGM.getModule()); 1374 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 1375 CodeGenFunction CGF(CGM); 1376 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1377 // Dest = (void*[n])(LHSArg); 1378 // Src = (void*[n])(RHSArg); 1379 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1380 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), 1381 CGF.PointerAlignInBytes), 1382 ArgsType); 1383 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1384 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), 1385 CGF.PointerAlignInBytes), 1386 ArgsType); 1387 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1388 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1389 // ... 1390 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1391 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1392 auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1393 CGF.Builder.CreateAlignedLoad( 1394 CGF.Builder.CreateStructGEP(nullptr, LHS, I), 1395 CGM.PointerAlignInBytes), 1396 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); 1397 auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1398 CGF.Builder.CreateAlignedLoad( 1399 CGF.Builder.CreateStructGEP(nullptr, RHS, I), 1400 CGM.PointerAlignInBytes), 1401 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); 1402 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 1403 QualType Type = VD->getType(); 1404 CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr, 1405 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()), 1406 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()), 1407 AssignmentOps[I]); 1408 } 1409 CGF.FinishFunction(); 1410 return Fn; 1411 } 1412 1413 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1414 const RegionCodeGenTy &SingleOpGen, 1415 SourceLocation Loc, 1416 ArrayRef<const Expr *> CopyprivateVars, 1417 ArrayRef<const Expr *> SrcExprs, 1418 ArrayRef<const Expr *> DstExprs, 1419 ArrayRef<const Expr *> AssignmentOps) { 1420 assert(CopyprivateVars.size() == SrcExprs.size() && 1421 CopyprivateVars.size() == DstExprs.size() && 1422 CopyprivateVars.size() == AssignmentOps.size()); 1423 auto &C = CGM.getContext(); 1424 // int32 did_it = 0; 1425 // if(__kmpc_single(ident_t *, gtid)) { 1426 // SingleOpGen(); 1427 // __kmpc_end_single(ident_t *, gtid); 1428 // did_it = 1; 1429 // } 1430 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1431 // <copy_func>, did_it); 1432 1433 llvm::AllocaInst *DidIt = nullptr; 1434 if (!CopyprivateVars.empty()) { 1435 // int32 did_it = 0; 1436 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1437 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 1438 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt, 1439 DidIt->getAlignment()); 1440 } 1441 // Prepare arguments and build a call to __kmpc_single 1442 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1443 auto *IsSingle = 1444 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 1445 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1446 SingleCallEndCleanup; 1447 emitIfStmt(CGF, IsSingle, OMPD_single, [&](CodeGenFunction &CGF) -> void { 1448 CodeGenFunction::RunCleanupsScope Scope(CGF); 1449 CGF.EHStack.pushCleanup<SingleCallEndCleanup>( 1450 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), 1451 llvm::makeArrayRef(Args)); 1452 SingleOpGen(CGF); 1453 if (DidIt) { 1454 // did_it = 1; 1455 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt, 1456 DidIt->getAlignment()); 1457 } 1458 }); 1459 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1460 // <copy_func>, did_it); 1461 if (DidIt) { 1462 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 1463 auto CopyprivateArrayTy = 1464 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1465 /*IndexTypeQuals=*/0); 1466 // Create a list of all private variables for copyprivate. 1467 auto *CopyprivateList = 1468 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 1469 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 1470 auto *Elem = CGF.Builder.CreateStructGEP( 1471 CopyprivateList->getAllocatedType(), CopyprivateList, I); 1472 CGF.Builder.CreateAlignedStore( 1473 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1474 CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy), 1475 Elem, CGM.PointerAlignInBytes); 1476 } 1477 // Build function that copies private values from single region to all other 1478 // threads in the corresponding parallel region. 1479 auto *CpyFn = emitCopyprivateCopyFunction( 1480 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 1481 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 1482 auto *BufSize = llvm::ConstantInt::get( 1483 CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity()); 1484 auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 1485 CGF.VoidPtrTy); 1486 auto *DidItVal = 1487 CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes); 1488 llvm::Value *Args[] = { 1489 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 1490 getThreadID(CGF, Loc), // i32 <gtid> 1491 BufSize, // size_t <buf_size> 1492 CL, // void *<copyprivate list> 1493 CpyFn, // void (*) (void *, void *) <copy_func> 1494 DidItVal // i32 did_it 1495 }; 1496 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 1497 } 1498 } 1499 1500 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 1501 const RegionCodeGenTy &OrderedOpGen, 1502 SourceLocation Loc) { 1503 // __kmpc_ordered(ident_t *, gtid); 1504 // OrderedOpGen(); 1505 // __kmpc_end_ordered(ident_t *, gtid); 1506 // Prepare arguments and build a call to __kmpc_ordered 1507 { 1508 CodeGenFunction::RunCleanupsScope Scope(CGF); 1509 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1510 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); 1511 // Build a call to __kmpc_end_ordered 1512 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1513 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), 1514 llvm::makeArrayRef(Args)); 1515 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 1516 } 1517 } 1518 1519 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 1520 OpenMPDirectiveKind Kind, 1521 bool CheckForCancel) { 1522 // Build call __kmpc_cancel_barrier(loc, thread_id); 1523 // Build call __kmpc_barrier(loc, thread_id); 1524 OpenMPLocationFlags Flags = OMP_IDENT_KMPC; 1525 if (Kind == OMPD_for) { 1526 Flags = 1527 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); 1528 } else if (Kind == OMPD_sections) { 1529 Flags = static_cast<OpenMPLocationFlags>(Flags | 1530 OMP_IDENT_BARRIER_IMPL_SECTIONS); 1531 } else if (Kind == OMPD_single) { 1532 Flags = 1533 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); 1534 } else if (Kind == OMPD_barrier) { 1535 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); 1536 } else { 1537 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); 1538 } 1539 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 1540 // thread_id); 1541 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 1542 getThreadID(CGF, Loc)}; 1543 if (auto *OMPRegionInfo = 1544 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1545 auto CancelDestination = 1546 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 1547 if (CancelDestination.isValid()) { 1548 auto *Result = CGF.EmitRuntimeCall( 1549 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 1550 if (CheckForCancel) { 1551 // if (__kmpc_cancel_barrier()) { 1552 // exit from construct; 1553 // } 1554 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 1555 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 1556 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 1557 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 1558 CGF.EmitBlock(ExitBB); 1559 // exit from construct; 1560 CGF.EmitBranchThroughCleanup(CancelDestination); 1561 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 1562 } 1563 return; 1564 } 1565 } 1566 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 1567 } 1568 1569 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 1570 /// the enum sched_type in kmp.h). 1571 enum OpenMPSchedType { 1572 /// \brief Lower bound for default (unordered) versions. 1573 OMP_sch_lower = 32, 1574 OMP_sch_static_chunked = 33, 1575 OMP_sch_static = 34, 1576 OMP_sch_dynamic_chunked = 35, 1577 OMP_sch_guided_chunked = 36, 1578 OMP_sch_runtime = 37, 1579 OMP_sch_auto = 38, 1580 /// \brief Lower bound for 'ordered' versions. 1581 OMP_ord_lower = 64, 1582 OMP_ord_static_chunked = 65, 1583 OMP_ord_static = 66, 1584 OMP_ord_dynamic_chunked = 67, 1585 OMP_ord_guided_chunked = 68, 1586 OMP_ord_runtime = 69, 1587 OMP_ord_auto = 70, 1588 OMP_sch_default = OMP_sch_static, 1589 }; 1590 1591 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 1592 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 1593 bool Chunked, bool Ordered) { 1594 switch (ScheduleKind) { 1595 case OMPC_SCHEDULE_static: 1596 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 1597 : (Ordered ? OMP_ord_static : OMP_sch_static); 1598 case OMPC_SCHEDULE_dynamic: 1599 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 1600 case OMPC_SCHEDULE_guided: 1601 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 1602 case OMPC_SCHEDULE_runtime: 1603 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 1604 case OMPC_SCHEDULE_auto: 1605 return Ordered ? OMP_ord_auto : OMP_sch_auto; 1606 case OMPC_SCHEDULE_unknown: 1607 assert(!Chunked && "chunk was specified but schedule kind not known"); 1608 return Ordered ? OMP_ord_static : OMP_sch_static; 1609 } 1610 llvm_unreachable("Unexpected runtime schedule"); 1611 } 1612 1613 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 1614 bool Chunked) const { 1615 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 1616 return Schedule == OMP_sch_static; 1617 } 1618 1619 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 1620 auto Schedule = 1621 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 1622 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 1623 return Schedule != OMP_sch_static; 1624 } 1625 1626 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, 1627 OpenMPScheduleClauseKind ScheduleKind, 1628 unsigned IVSize, bool IVSigned, bool Ordered, 1629 llvm::Value *IL, llvm::Value *LB, 1630 llvm::Value *UB, llvm::Value *ST, 1631 llvm::Value *Chunk) { 1632 OpenMPSchedType Schedule = 1633 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1634 if (Ordered || 1635 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 1636 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) { 1637 // Call __kmpc_dispatch_init( 1638 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 1639 // kmp_int[32|64] lower, kmp_int[32|64] upper, 1640 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 1641 1642 // If the Chunk was not specified in the clause - use default value 1. 1643 if (Chunk == nullptr) 1644 Chunk = CGF.Builder.getIntN(IVSize, 1); 1645 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1646 getThreadID(CGF, Loc), 1647 CGF.Builder.getInt32(Schedule), // Schedule type 1648 CGF.Builder.getIntN(IVSize, 0), // Lower 1649 UB, // Upper 1650 CGF.Builder.getIntN(IVSize, 1), // Stride 1651 Chunk // Chunk 1652 }; 1653 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 1654 } else { 1655 // Call __kmpc_for_static_init( 1656 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 1657 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 1658 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 1659 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 1660 if (Chunk == nullptr) { 1661 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && 1662 "expected static non-chunked schedule"); 1663 // If the Chunk was not specified in the clause - use default value 1. 1664 Chunk = CGF.Builder.getIntN(IVSize, 1); 1665 } else 1666 assert((Schedule == OMP_sch_static_chunked || 1667 Schedule == OMP_ord_static_chunked) && 1668 "expected static chunked schedule"); 1669 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1670 getThreadID(CGF, Loc), 1671 CGF.Builder.getInt32(Schedule), // Schedule type 1672 IL, // &isLastIter 1673 LB, // &LB 1674 UB, // &UB 1675 ST, // &Stride 1676 CGF.Builder.getIntN(IVSize, 1), // Incr 1677 Chunk // Chunk 1678 }; 1679 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); 1680 } 1681 } 1682 1683 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 1684 SourceLocation Loc) { 1685 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1686 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1687 getThreadID(CGF, Loc)}; 1688 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1689 Args); 1690 } 1691 1692 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 1693 SourceLocation Loc, 1694 unsigned IVSize, 1695 bool IVSigned) { 1696 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 1697 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1698 getThreadID(CGF, Loc)}; 1699 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 1700 } 1701 1702 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 1703 SourceLocation Loc, unsigned IVSize, 1704 bool IVSigned, llvm::Value *IL, 1705 llvm::Value *LB, llvm::Value *UB, 1706 llvm::Value *ST) { 1707 // Call __kmpc_dispatch_next( 1708 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 1709 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 1710 // kmp_int[32|64] *p_stride); 1711 llvm::Value *Args[] = { 1712 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 1713 IL, // &isLastIter 1714 LB, // &Lower 1715 UB, // &Upper 1716 ST // &Stride 1717 }; 1718 llvm::Value *Call = 1719 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 1720 return CGF.EmitScalarConversion( 1721 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 1722 CGF.getContext().BoolTy); 1723 } 1724 1725 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1726 llvm::Value *NumThreads, 1727 SourceLocation Loc) { 1728 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1729 llvm::Value *Args[] = { 1730 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1731 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1732 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1733 Args); 1734 } 1735 1736 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 1737 OpenMPProcBindClauseKind ProcBind, 1738 SourceLocation Loc) { 1739 // Constants for proc bind value accepted by the runtime. 1740 enum ProcBindTy { 1741 ProcBindFalse = 0, 1742 ProcBindTrue, 1743 ProcBindMaster, 1744 ProcBindClose, 1745 ProcBindSpread, 1746 ProcBindIntel, 1747 ProcBindDefault 1748 } RuntimeProcBind; 1749 switch (ProcBind) { 1750 case OMPC_PROC_BIND_master: 1751 RuntimeProcBind = ProcBindMaster; 1752 break; 1753 case OMPC_PROC_BIND_close: 1754 RuntimeProcBind = ProcBindClose; 1755 break; 1756 case OMPC_PROC_BIND_spread: 1757 RuntimeProcBind = ProcBindSpread; 1758 break; 1759 case OMPC_PROC_BIND_unknown: 1760 llvm_unreachable("Unsupported proc_bind value."); 1761 } 1762 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 1763 llvm::Value *Args[] = { 1764 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1765 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 1766 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 1767 } 1768 1769 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1770 SourceLocation Loc) { 1771 // Build call void __kmpc_flush(ident_t *loc) 1772 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1773 emitUpdateLocation(CGF, Loc)); 1774 } 1775 1776 namespace { 1777 /// \brief Indexes of fields for type kmp_task_t. 1778 enum KmpTaskTFields { 1779 /// \brief List of shared variables. 1780 KmpTaskTShareds, 1781 /// \brief Task routine. 1782 KmpTaskTRoutine, 1783 /// \brief Partition id for the untied tasks. 1784 KmpTaskTPartId, 1785 /// \brief Function with call of destructors for private variables. 1786 KmpTaskTDestructors, 1787 }; 1788 } // namespace 1789 1790 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 1791 if (!KmpRoutineEntryPtrTy) { 1792 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 1793 auto &C = CGM.getContext(); 1794 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 1795 FunctionProtoType::ExtProtoInfo EPI; 1796 KmpRoutineEntryPtrQTy = C.getPointerType( 1797 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 1798 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 1799 } 1800 } 1801 1802 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1803 QualType FieldTy) { 1804 auto *Field = FieldDecl::Create( 1805 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1806 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1807 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1808 Field->setAccess(AS_public); 1809 DC->addDecl(Field); 1810 } 1811 1812 namespace { 1813 struct PrivateHelpersTy { 1814 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 1815 const VarDecl *PrivateElemInit) 1816 : Original(Original), PrivateCopy(PrivateCopy), 1817 PrivateElemInit(PrivateElemInit) {} 1818 const VarDecl *Original; 1819 const VarDecl *PrivateCopy; 1820 const VarDecl *PrivateElemInit; 1821 }; 1822 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 1823 } // namespace 1824 1825 static RecordDecl * 1826 createPrivatesRecordDecl(CodeGenModule &CGM, 1827 const ArrayRef<PrivateDataTy> Privates) { 1828 if (!Privates.empty()) { 1829 auto &C = CGM.getContext(); 1830 // Build struct .kmp_privates_t. { 1831 // /* private vars */ 1832 // }; 1833 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 1834 RD->startDefinition(); 1835 for (auto &&Pair : Privates) { 1836 auto Type = Pair.second.Original->getType(); 1837 Type = Type.getNonReferenceType(); 1838 addFieldToRecordDecl(C, RD, Type); 1839 } 1840 RD->completeDefinition(); 1841 return RD; 1842 } 1843 return nullptr; 1844 } 1845 1846 static RecordDecl * 1847 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, 1848 QualType KmpRoutineEntryPointerQTy) { 1849 auto &C = CGM.getContext(); 1850 // Build struct kmp_task_t { 1851 // void * shareds; 1852 // kmp_routine_entry_t routine; 1853 // kmp_int32 part_id; 1854 // kmp_routine_entry_t destructors; 1855 // }; 1856 auto *RD = C.buildImplicitRecord("kmp_task_t"); 1857 RD->startDefinition(); 1858 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1859 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1860 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1861 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1862 RD->completeDefinition(); 1863 return RD; 1864 } 1865 1866 static RecordDecl * 1867 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 1868 const ArrayRef<PrivateDataTy> Privates) { 1869 auto &C = CGM.getContext(); 1870 // Build struct kmp_task_t_with_privates { 1871 // kmp_task_t task_data; 1872 // .kmp_privates_t. privates; 1873 // }; 1874 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 1875 RD->startDefinition(); 1876 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 1877 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 1878 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 1879 } 1880 RD->completeDefinition(); 1881 return RD; 1882 } 1883 1884 /// \brief Emit a proxy function which accepts kmp_task_t as the second 1885 /// argument. 1886 /// \code 1887 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 1888 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, 1889 /// tt->shareds); 1890 /// return 0; 1891 /// } 1892 /// \endcode 1893 static llvm::Value * 1894 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 1895 QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, 1896 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 1897 QualType SharedsPtrTy, llvm::Value *TaskFunction, 1898 llvm::Value *TaskPrivatesMap) { 1899 auto &C = CGM.getContext(); 1900 FunctionArgList Args; 1901 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 1902 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 1903 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 1904 Args.push_back(&GtidArg); 1905 Args.push_back(&TaskTypeArg); 1906 FunctionType::ExtInfo Info; 1907 auto &TaskEntryFnInfo = 1908 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 1909 /*isVariadic=*/false); 1910 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 1911 auto *TaskEntry = 1912 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 1913 ".omp_task_entry.", &CGM.getModule()); 1914 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry); 1915 CodeGenFunction CGF(CGM); 1916 CGF.disableDebugInfo(); 1917 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 1918 1919 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 1920 // tt->task_data.shareds); 1921 auto *GtidParam = CGF.EmitLoadOfScalar( 1922 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, 1923 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); 1924 auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad( 1925 CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes); 1926 LValue TDBase = 1927 CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy); 1928 auto *KmpTaskTWithPrivatesQTyRD = 1929 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 1930 LValue Base = 1931 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 1932 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 1933 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 1934 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 1935 auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); 1936 1937 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 1938 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 1939 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1940 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 1941 CGF.ConvertTypeForMem(SharedsPtrTy)); 1942 1943 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 1944 llvm::Value *PrivatesParam; 1945 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 1946 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 1947 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1948 PrivatesLVal.getAddress(), CGF.VoidPtrTy); 1949 } else { 1950 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 1951 } 1952 1953 llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, 1954 TaskPrivatesMap, SharedsParam}; 1955 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 1956 CGF.EmitStoreThroughLValue( 1957 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 1958 CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 1959 CGF.FinishFunction(); 1960 return TaskEntry; 1961 } 1962 1963 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 1964 SourceLocation Loc, 1965 QualType KmpInt32Ty, 1966 QualType KmpTaskTWithPrivatesPtrQTy, 1967 QualType KmpTaskTWithPrivatesQTy) { 1968 auto &C = CGM.getContext(); 1969 FunctionArgList Args; 1970 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 1971 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 1972 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); 1973 Args.push_back(&GtidArg); 1974 Args.push_back(&TaskTypeArg); 1975 FunctionType::ExtInfo Info; 1976 auto &DestructorFnInfo = 1977 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 1978 /*isVariadic=*/false); 1979 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 1980 auto *DestructorFn = 1981 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 1982 ".omp_task_destructor.", &CGM.getModule()); 1983 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn); 1984 CodeGenFunction CGF(CGM); 1985 CGF.disableDebugInfo(); 1986 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 1987 Args); 1988 1989 auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad( 1990 CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes); 1991 LValue Base = 1992 CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy); 1993 auto *KmpTaskTWithPrivatesQTyRD = 1994 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 1995 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 1996 Base = CGF.EmitLValueForField(Base, *FI); 1997 for (auto *Field : 1998 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 1999 if (auto DtorKind = Field->getType().isDestructedType()) { 2000 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 2001 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 2002 } 2003 } 2004 CGF.FinishFunction(); 2005 return DestructorFn; 2006 } 2007 2008 /// \brief Emit a privates mapping function for correct handling of private and 2009 /// firstprivate variables. 2010 /// \code 2011 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 2012 /// **noalias priv1,..., <tyn> **noalias privn) { 2013 /// *priv1 = &.privates.priv1; 2014 /// ...; 2015 /// *privn = &.privates.privn; 2016 /// } 2017 /// \endcode 2018 static llvm::Value * 2019 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 2020 const ArrayRef<const Expr *> PrivateVars, 2021 const ArrayRef<const Expr *> FirstprivateVars, 2022 QualType PrivatesQTy, 2023 const ArrayRef<PrivateDataTy> Privates) { 2024 auto &C = CGM.getContext(); 2025 FunctionArgList Args; 2026 ImplicitParamDecl TaskPrivatesArg( 2027 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 2028 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 2029 Args.push_back(&TaskPrivatesArg); 2030 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 2031 unsigned Counter = 1; 2032 for (auto *E: PrivateVars) { 2033 Args.push_back(ImplicitParamDecl::Create( 2034 C, /*DC=*/nullptr, Loc, 2035 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2036 .withConst() 2037 .withRestrict())); 2038 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2039 PrivateVarsPos[VD] = Counter; 2040 ++Counter; 2041 } 2042 for (auto *E : FirstprivateVars) { 2043 Args.push_back(ImplicitParamDecl::Create( 2044 C, /*DC=*/nullptr, Loc, 2045 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2046 .withConst() 2047 .withRestrict())); 2048 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2049 PrivateVarsPos[VD] = Counter; 2050 ++Counter; 2051 } 2052 FunctionType::ExtInfo Info; 2053 auto &TaskPrivatesMapFnInfo = 2054 CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info, 2055 /*isVariadic=*/false); 2056 auto *TaskPrivatesMapTy = 2057 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 2058 auto *TaskPrivatesMap = llvm::Function::Create( 2059 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 2060 ".omp_task_privates_map.", &CGM.getModule()); 2061 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo, 2062 TaskPrivatesMap); 2063 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 2064 CodeGenFunction CGF(CGM); 2065 CGF.disableDebugInfo(); 2066 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 2067 TaskPrivatesMapFnInfo, Args); 2068 2069 // *privi = &.privates.privi; 2070 auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad( 2071 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes); 2072 LValue Base = 2073 CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy); 2074 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 2075 Counter = 0; 2076 for (auto *Field : PrivatesQTyRD->fields()) { 2077 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 2078 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 2079 auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD), 2080 VD->getType()); 2081 auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc); 2082 CGF.EmitStoreOfScalar( 2083 FieldLVal.getAddress(), 2084 CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(), 2085 RefLVal.getType()->getPointeeType())); 2086 ++Counter; 2087 } 2088 CGF.FinishFunction(); 2089 return TaskPrivatesMap; 2090 } 2091 2092 static int array_pod_sort_comparator(const PrivateDataTy *P1, 2093 const PrivateDataTy *P2) { 2094 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 2095 } 2096 2097 void CGOpenMPRuntime::emitTaskCall( 2098 CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, 2099 bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 2100 llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds, 2101 const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, 2102 ArrayRef<const Expr *> PrivateCopies, 2103 ArrayRef<const Expr *> FirstprivateVars, 2104 ArrayRef<const Expr *> FirstprivateCopies, 2105 ArrayRef<const Expr *> FirstprivateInits, 2106 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { 2107 auto &C = CGM.getContext(); 2108 llvm::SmallVector<PrivateDataTy, 8> Privates; 2109 // Aggregate privates and sort them by the alignment. 2110 auto I = PrivateCopies.begin(); 2111 for (auto *E : PrivateVars) { 2112 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2113 Privates.push_back(std::make_pair( 2114 C.getTypeAlignInChars(VD->getType()), 2115 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2116 /*PrivateElemInit=*/nullptr))); 2117 ++I; 2118 } 2119 I = FirstprivateCopies.begin(); 2120 auto IElemInitRef = FirstprivateInits.begin(); 2121 for (auto *E : FirstprivateVars) { 2122 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2123 Privates.push_back(std::make_pair( 2124 C.getTypeAlignInChars(VD->getType()), 2125 PrivateHelpersTy( 2126 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2127 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 2128 ++I, ++IElemInitRef; 2129 } 2130 llvm::array_pod_sort(Privates.begin(), Privates.end(), 2131 array_pod_sort_comparator); 2132 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2133 // Build type kmp_routine_entry_t (if not built yet). 2134 emitKmpRoutineEntryT(KmpInt32Ty); 2135 // Build type kmp_task_t (if not built yet). 2136 if (KmpTaskTQTy.isNull()) { 2137 KmpTaskTQTy = C.getRecordType( 2138 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); 2139 } 2140 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2141 // Build particular struct kmp_task_t for the given task. 2142 auto *KmpTaskTWithPrivatesQTyRD = 2143 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 2144 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 2145 QualType KmpTaskTWithPrivatesPtrQTy = 2146 C.getPointerType(KmpTaskTWithPrivatesQTy); 2147 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 2148 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 2149 auto KmpTaskTWithPrivatesTySize = 2150 CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy)); 2151 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 2152 2153 // Emit initial values for private copies (if any). 2154 llvm::Value *TaskPrivatesMap = nullptr; 2155 auto *TaskPrivatesMapTy = 2156 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 2157 3) 2158 ->getType(); 2159 if (!Privates.empty()) { 2160 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2161 TaskPrivatesMap = emitTaskPrivateMappingFunction( 2162 CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); 2163 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2164 TaskPrivatesMap, TaskPrivatesMapTy); 2165 } else { 2166 TaskPrivatesMap = llvm::ConstantPointerNull::get( 2167 cast<llvm::PointerType>(TaskPrivatesMapTy)); 2168 } 2169 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 2170 // kmp_task_t *tt); 2171 auto *TaskEntry = emitProxyTaskFunction( 2172 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, 2173 KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); 2174 2175 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2176 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2177 // kmp_routine_entry_t *task_entry); 2178 // Task flags. Format is taken from 2179 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 2180 // description of kmp_tasking_flags struct. 2181 const unsigned TiedFlag = 0x1; 2182 const unsigned FinalFlag = 0x2; 2183 unsigned Flags = Tied ? TiedFlag : 0; 2184 auto *TaskFlags = 2185 Final.getPointer() 2186 ? CGF.Builder.CreateSelect(Final.getPointer(), 2187 CGF.Builder.getInt32(FinalFlag), 2188 CGF.Builder.getInt32(/*C=*/0)) 2189 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 2190 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 2191 auto SharedsSize = C.getTypeSizeInChars(SharedsTy); 2192 llvm::Value *AllocArgs[] = { 2193 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags, 2194 KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize), 2195 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry, 2196 KmpRoutineEntryPtrTy)}; 2197 auto *NewTask = CGF.EmitRuntimeCall( 2198 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 2199 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2200 NewTask, KmpTaskTWithPrivatesPtrTy); 2201 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 2202 KmpTaskTWithPrivatesQTy); 2203 LValue TDBase = 2204 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2205 // Fill the data in the resulting kmp_task_t record. 2206 // Copy shareds if there are any. 2207 llvm::Value *KmpTaskSharedsPtr = nullptr; 2208 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 2209 KmpTaskSharedsPtr = CGF.EmitLoadOfScalar( 2210 CGF.EmitLValueForField( 2211 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 2212 Loc); 2213 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 2214 } 2215 // Emit initial values for private copies (if any). 2216 bool NeedsCleanup = false; 2217 if (!Privates.empty()) { 2218 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2219 auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); 2220 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 2221 LValue SharedsBase; 2222 if (!FirstprivateVars.empty()) { 2223 SharedsBase = CGF.MakeNaturalAlignAddrLValue( 2224 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2225 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 2226 SharedsTy); 2227 } 2228 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 2229 cast<CapturedStmt>(*D.getAssociatedStmt())); 2230 for (auto &&Pair : Privates) { 2231 auto *VD = Pair.second.PrivateCopy; 2232 auto *Init = VD->getAnyInitializer(); 2233 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 2234 if (Init) { 2235 if (auto *Elem = Pair.second.PrivateElemInit) { 2236 auto *OriginalVD = Pair.second.Original; 2237 auto *SharedField = CapturesInfo.lookup(OriginalVD); 2238 auto SharedRefLValue = 2239 CGF.EmitLValueForField(SharedsBase, SharedField); 2240 QualType Type = OriginalVD->getType(); 2241 if (Type->isArrayType()) { 2242 // Initialize firstprivate array. 2243 if (!isa<CXXConstructExpr>(Init) || 2244 CGF.isTrivialInitializer(Init)) { 2245 // Perform simple memcpy. 2246 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 2247 SharedRefLValue.getAddress(), Type); 2248 } else { 2249 // Initialize firstprivate array using element-by-element 2250 // intialization. 2251 CGF.EmitOMPAggregateAssign( 2252 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), 2253 Type, [&CGF, Elem, Init, &CapturesInfo]( 2254 llvm::Value *DestElement, llvm::Value *SrcElement) { 2255 // Clean up any temporaries needed by the initialization. 2256 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2257 InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{ 2258 return SrcElement; 2259 }); 2260 (void)InitScope.Privatize(); 2261 // Emit initialization for single element. 2262 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 2263 CGF, &CapturesInfo); 2264 CGF.EmitAnyExprToMem(Init, DestElement, 2265 Init->getType().getQualifiers(), 2266 /*IsInitializer=*/false); 2267 }); 2268 } 2269 } else { 2270 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2271 InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{ 2272 return SharedRefLValue.getAddress(); 2273 }); 2274 (void)InitScope.Privatize(); 2275 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 2276 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 2277 /*capturedByInit=*/false); 2278 } 2279 } else { 2280 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 2281 } 2282 } 2283 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); 2284 ++FI; 2285 } 2286 } 2287 // Provide pointer to function with destructors for privates. 2288 llvm::Value *DestructorFn = 2289 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, 2290 KmpTaskTWithPrivatesPtrQTy, 2291 KmpTaskTWithPrivatesQTy) 2292 : llvm::ConstantPointerNull::get( 2293 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 2294 LValue Destructor = CGF.EmitLValueForField( 2295 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); 2296 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2297 DestructorFn, KmpRoutineEntryPtrTy), 2298 Destructor); 2299 2300 // Process list of dependences. 2301 llvm::Value *DependInfo = nullptr; 2302 unsigned DependencesNumber = Dependences.size(); 2303 if (!Dependences.empty()) { 2304 // Dependence kind for RTL. 2305 enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 }; 2306 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 2307 RecordDecl *KmpDependInfoRD; 2308 QualType FlagsTy = C.getIntTypeForBitwidth( 2309 C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false); 2310 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 2311 if (KmpDependInfoTy.isNull()) { 2312 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 2313 KmpDependInfoRD->startDefinition(); 2314 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 2315 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 2316 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 2317 KmpDependInfoRD->completeDefinition(); 2318 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 2319 } else { 2320 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 2321 } 2322 // Define type kmp_depend_info[<Dependences.size()>]; 2323 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 2324 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, Dependences.size()), 2325 ArrayType::Normal, /*IndexTypeQuals=*/0); 2326 // kmp_depend_info[<Dependences.size()>] deps; 2327 DependInfo = CGF.CreateMemTemp(KmpDependInfoArrayTy); 2328 for (unsigned i = 0; i < DependencesNumber; ++i) { 2329 auto Addr = CGF.EmitLValue(Dependences[i].second); 2330 auto *Size = llvm::ConstantInt::get( 2331 CGF.SizeTy, 2332 C.getTypeSizeInChars(Dependences[i].second->getType()).getQuantity()); 2333 auto Base = CGF.MakeNaturalAlignAddrLValue( 2334 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, i), 2335 KmpDependInfoTy); 2336 // deps[i].base_addr = &<Dependences[i].second>; 2337 auto BaseAddrLVal = CGF.EmitLValueForField( 2338 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 2339 CGF.EmitStoreOfScalar( 2340 CGF.Builder.CreatePtrToInt(Addr.getAddress(), CGF.IntPtrTy), 2341 BaseAddrLVal); 2342 // deps[i].len = sizeof(<Dependences[i].second>); 2343 auto LenLVal = CGF.EmitLValueForField( 2344 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 2345 CGF.EmitStoreOfScalar(Size, LenLVal); 2346 // deps[i].flags = <Dependences[i].first>; 2347 RTLDependenceKindTy DepKind; 2348 switch (Dependences[i].first) { 2349 case OMPC_DEPEND_in: 2350 DepKind = DepIn; 2351 break; 2352 case OMPC_DEPEND_out: 2353 DepKind = DepOut; 2354 break; 2355 case OMPC_DEPEND_inout: 2356 DepKind = DepInOut; 2357 break; 2358 case OMPC_DEPEND_unknown: 2359 llvm_unreachable("Unknown task dependence type"); 2360 } 2361 auto FlagsLVal = CGF.EmitLValueForField( 2362 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 2363 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 2364 FlagsLVal); 2365 } 2366 DependInfo = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2367 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, 0), 2368 CGF.VoidPtrTy); 2369 } 2370 2371 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 2372 // libcall. 2373 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2374 // *new_task); 2375 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2376 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2377 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 2378 // list is not empty 2379 auto *ThreadID = getThreadID(CGF, Loc); 2380 auto *UpLoc = emitUpdateLocation(CGF, Loc); 2381 llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask}; 2382 llvm::Value *DepTaskArgs[] = { 2383 UpLoc, 2384 ThreadID, 2385 NewTask, 2386 DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr, 2387 DependInfo, 2388 DependInfo ? CGF.Builder.getInt32(0) : nullptr, 2389 DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr}; 2390 auto &&ThenCodeGen = [this, DependInfo, &TaskArgs, 2391 &DepTaskArgs](CodeGenFunction &CGF) { 2392 // TODO: add check for untied tasks. 2393 CGF.EmitRuntimeCall( 2394 createRuntimeFunction(DependInfo ? OMPRTL__kmpc_omp_task_with_deps 2395 : OMPRTL__kmpc_omp_task), 2396 DependInfo ? makeArrayRef(DepTaskArgs) : makeArrayRef(TaskArgs)); 2397 }; 2398 typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> 2399 IfCallEndCleanup; 2400 llvm::Value *DepWaitTaskArgs[] = { 2401 UpLoc, 2402 ThreadID, 2403 DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr, 2404 DependInfo, 2405 DependInfo ? CGF.Builder.getInt32(0) : nullptr, 2406 DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr}; 2407 auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 2408 DependInfo, &DepWaitTaskArgs](CodeGenFunction &CGF) { 2409 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 2410 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2411 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 2412 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 2413 // is specified. 2414 if (DependInfo) 2415 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 2416 DepWaitTaskArgs); 2417 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 2418 // kmp_task_t *new_task); 2419 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), 2420 TaskArgs); 2421 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 2422 // kmp_task_t *new_task); 2423 CGF.EHStack.pushCleanup<IfCallEndCleanup>( 2424 NormalAndEHCleanup, 2425 createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), 2426 llvm::makeArrayRef(TaskArgs)); 2427 2428 // Call proxy_task_entry(gtid, new_task); 2429 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 2430 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 2431 }; 2432 if (IfCond) { 2433 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 2434 } else { 2435 CodeGenFunction::RunCleanupsScope Scope(CGF); 2436 ThenCodeGen(CGF); 2437 } 2438 } 2439 2440 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 2441 llvm::Type *ArgsType, 2442 ArrayRef<const Expr *> LHSExprs, 2443 ArrayRef<const Expr *> RHSExprs, 2444 ArrayRef<const Expr *> ReductionOps) { 2445 auto &C = CGM.getContext(); 2446 2447 // void reduction_func(void *LHSArg, void *RHSArg); 2448 FunctionArgList Args; 2449 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2450 C.VoidPtrTy); 2451 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2452 C.VoidPtrTy); 2453 Args.push_back(&LHSArg); 2454 Args.push_back(&RHSArg); 2455 FunctionType::ExtInfo EI; 2456 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 2457 C.VoidTy, Args, EI, /*isVariadic=*/false); 2458 auto *Fn = llvm::Function::Create( 2459 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2460 ".omp.reduction.reduction_func", &CGM.getModule()); 2461 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 2462 CodeGenFunction CGF(CGM); 2463 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2464 2465 // Dst = (void*[n])(LHSArg); 2466 // Src = (void*[n])(RHSArg); 2467 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2468 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), 2469 CGF.PointerAlignInBytes), 2470 ArgsType); 2471 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2472 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), 2473 CGF.PointerAlignInBytes), 2474 ArgsType); 2475 2476 // ... 2477 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 2478 // ... 2479 CodeGenFunction::OMPPrivateScope Scope(CGF); 2480 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) { 2481 Scope.addPrivate( 2482 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()), 2483 [&]() -> llvm::Value *{ 2484 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2485 CGF.Builder.CreateAlignedLoad( 2486 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I), 2487 CGM.PointerAlignInBytes), 2488 CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType()))); 2489 }); 2490 Scope.addPrivate( 2491 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()), 2492 [&]() -> llvm::Value *{ 2493 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2494 CGF.Builder.CreateAlignedLoad( 2495 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I), 2496 CGM.PointerAlignInBytes), 2497 CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType()))); 2498 }); 2499 } 2500 Scope.Privatize(); 2501 for (auto *E : ReductionOps) { 2502 CGF.EmitIgnoredExpr(E); 2503 } 2504 Scope.ForceCleanup(); 2505 CGF.FinishFunction(); 2506 return Fn; 2507 } 2508 2509 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 2510 ArrayRef<const Expr *> LHSExprs, 2511 ArrayRef<const Expr *> RHSExprs, 2512 ArrayRef<const Expr *> ReductionOps, 2513 bool WithNowait, bool SimpleReduction) { 2514 // Next code should be emitted for reduction: 2515 // 2516 // static kmp_critical_name lock = { 0 }; 2517 // 2518 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 2519 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 2520 // ... 2521 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 2522 // *(Type<n>-1*)rhs[<n>-1]); 2523 // } 2524 // 2525 // ... 2526 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 2527 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2528 // RedList, reduce_func, &<lock>)) { 2529 // case 1: 2530 // ... 2531 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2532 // ... 2533 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2534 // break; 2535 // case 2: 2536 // ... 2537 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2538 // ... 2539 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 2540 // break; 2541 // default:; 2542 // } 2543 // 2544 // if SimpleReduction is true, only the next code is generated: 2545 // ... 2546 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2547 // ... 2548 2549 auto &C = CGM.getContext(); 2550 2551 if (SimpleReduction) { 2552 CodeGenFunction::RunCleanupsScope Scope(CGF); 2553 for (auto *E : ReductionOps) { 2554 CGF.EmitIgnoredExpr(E); 2555 } 2556 return; 2557 } 2558 2559 // 1. Build a list of reduction variables. 2560 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 2561 llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size()); 2562 QualType ReductionArrayTy = 2563 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2564 /*IndexTypeQuals=*/0); 2565 auto *ReductionList = 2566 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 2567 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) { 2568 auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I); 2569 CGF.Builder.CreateAlignedStore( 2570 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2571 CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy), 2572 Elem, CGM.PointerAlignInBytes); 2573 } 2574 2575 // 2. Emit reduce_func(). 2576 auto *ReductionFn = emitReductionFunction( 2577 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs, 2578 RHSExprs, ReductionOps); 2579 2580 // 3. Create static kmp_critical_name lock = { 0 }; 2581 auto *Lock = getCriticalRegionLock(".reduction"); 2582 2583 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2584 // RedList, reduce_func, &<lock>); 2585 auto *IdentTLoc = emitUpdateLocation( 2586 CGF, Loc, 2587 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); 2588 auto *ThreadId = getThreadID(CGF, Loc); 2589 auto *ReductionArrayTySize = llvm::ConstantInt::get( 2590 CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity()); 2591 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, 2592 CGF.VoidPtrTy); 2593 llvm::Value *Args[] = { 2594 IdentTLoc, // ident_t *<loc> 2595 ThreadId, // i32 <gtid> 2596 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 2597 ReductionArrayTySize, // size_type sizeof(RedList) 2598 RL, // void *RedList 2599 ReductionFn, // void (*) (void *, void *) <reduce_func> 2600 Lock // kmp_critical_name *&<lock> 2601 }; 2602 auto Res = CGF.EmitRuntimeCall( 2603 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 2604 : OMPRTL__kmpc_reduce), 2605 Args); 2606 2607 // 5. Build switch(res) 2608 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 2609 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 2610 2611 // 6. Build case 1: 2612 // ... 2613 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2614 // ... 2615 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2616 // break; 2617 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 2618 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 2619 CGF.EmitBlock(Case1BB); 2620 2621 { 2622 CodeGenFunction::RunCleanupsScope Scope(CGF); 2623 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2624 llvm::Value *EndArgs[] = { 2625 IdentTLoc, // ident_t *<loc> 2626 ThreadId, // i32 <gtid> 2627 Lock // kmp_critical_name *&<lock> 2628 }; 2629 CGF.EHStack 2630 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 2631 NormalAndEHCleanup, 2632 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 2633 : OMPRTL__kmpc_end_reduce), 2634 llvm::makeArrayRef(EndArgs)); 2635 for (auto *E : ReductionOps) { 2636 CGF.EmitIgnoredExpr(E); 2637 } 2638 } 2639 2640 CGF.EmitBranch(DefaultBB); 2641 2642 // 7. Build case 2: 2643 // ... 2644 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2645 // ... 2646 // break; 2647 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 2648 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 2649 CGF.EmitBlock(Case2BB); 2650 2651 { 2652 CodeGenFunction::RunCleanupsScope Scope(CGF); 2653 if (!WithNowait) { 2654 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 2655 llvm::Value *EndArgs[] = { 2656 IdentTLoc, // ident_t *<loc> 2657 ThreadId, // i32 <gtid> 2658 Lock // kmp_critical_name *&<lock> 2659 }; 2660 CGF.EHStack 2661 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 2662 NormalAndEHCleanup, 2663 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 2664 llvm::makeArrayRef(EndArgs)); 2665 } 2666 auto I = LHSExprs.begin(); 2667 for (auto *E : ReductionOps) { 2668 const Expr *XExpr = nullptr; 2669 const Expr *EExpr = nullptr; 2670 const Expr *UpExpr = nullptr; 2671 BinaryOperatorKind BO = BO_Comma; 2672 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 2673 if (BO->getOpcode() == BO_Assign) { 2674 XExpr = BO->getLHS(); 2675 UpExpr = BO->getRHS(); 2676 } 2677 } 2678 // Try to emit update expression as a simple atomic. 2679 auto *RHSExpr = UpExpr; 2680 if (RHSExpr) { 2681 // Analyze RHS part of the whole expression. 2682 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 2683 RHSExpr->IgnoreParenImpCasts())) { 2684 // If this is a conditional operator, analyze its condition for 2685 // min/max reduction operator. 2686 RHSExpr = ACO->getCond(); 2687 } 2688 if (auto *BORHS = 2689 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 2690 EExpr = BORHS->getRHS(); 2691 BO = BORHS->getOpcode(); 2692 } 2693 } 2694 if (XExpr) { 2695 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2696 LValue X = CGF.EmitLValue(XExpr); 2697 RValue E; 2698 if (EExpr) 2699 E = CGF.EmitAnyExpr(EExpr); 2700 CGF.EmitOMPAtomicSimpleUpdateExpr( 2701 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, 2702 [&CGF, UpExpr, VD](RValue XRValue) { 2703 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 2704 PrivateScope.addPrivate( 2705 VD, [&CGF, VD, XRValue]() -> llvm::Value *{ 2706 auto *LHSTemp = CGF.CreateMemTemp(VD->getType()); 2707 CGF.EmitStoreThroughLValue( 2708 XRValue, 2709 CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType())); 2710 return LHSTemp; 2711 }); 2712 (void)PrivateScope.Privatize(); 2713 return CGF.EmitAnyExpr(UpExpr); 2714 }); 2715 } else { 2716 // Emit as a critical region. 2717 emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) { 2718 CGF.EmitIgnoredExpr(E); 2719 }, Loc); 2720 } 2721 ++I; 2722 } 2723 } 2724 2725 CGF.EmitBranch(DefaultBB); 2726 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 2727 } 2728 2729 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 2730 SourceLocation Loc) { 2731 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 2732 // global_tid); 2733 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2734 // Ignore return result until untied tasks are supported. 2735 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 2736 } 2737 2738 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 2739 OpenMPDirectiveKind InnerKind, 2740 const RegionCodeGenTy &CodeGen) { 2741 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind); 2742 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 2743 } 2744 2745 namespace { 2746 enum RTCancelKind { 2747 CancelNoreq = 0, 2748 CancelParallel = 1, 2749 CancelLoop = 2, 2750 CancelSections = 3, 2751 CancelTaskgroup = 4 2752 }; 2753 } 2754 2755 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 2756 RTCancelKind CancelKind = CancelNoreq; 2757 if (CancelRegion == OMPD_parallel) 2758 CancelKind = CancelParallel; 2759 else if (CancelRegion == OMPD_for) 2760 CancelKind = CancelLoop; 2761 else if (CancelRegion == OMPD_sections) 2762 CancelKind = CancelSections; 2763 else { 2764 assert(CancelRegion == OMPD_taskgroup); 2765 CancelKind = CancelTaskgroup; 2766 } 2767 return CancelKind; 2768 } 2769 2770 void CGOpenMPRuntime::emitCancellationPointCall( 2771 CodeGenFunction &CGF, SourceLocation Loc, 2772 OpenMPDirectiveKind CancelRegion) { 2773 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2774 // global_tid, kmp_int32 cncl_kind); 2775 if (auto *OMPRegionInfo = 2776 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2777 auto CancelDest = 2778 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2779 if (CancelDest.isValid()) { 2780 llvm::Value *Args[] = { 2781 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2782 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 2783 // Ignore return result until untied tasks are supported. 2784 auto *Result = CGF.EmitRuntimeCall( 2785 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 2786 // if (__kmpc_cancellationpoint()) { 2787 // __kmpc_cancel_barrier(); 2788 // exit from construct; 2789 // } 2790 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2791 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2792 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2793 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2794 CGF.EmitBlock(ExitBB); 2795 // __kmpc_cancel_barrier(); 2796 emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false); 2797 // exit from construct; 2798 CGF.EmitBranchThroughCleanup(CancelDest); 2799 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2800 } 2801 } 2802 } 2803 2804 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 2805 OpenMPDirectiveKind CancelRegion) { 2806 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2807 // kmp_int32 cncl_kind); 2808 if (auto *OMPRegionInfo = 2809 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 2810 auto CancelDest = 2811 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2812 if (CancelDest.isValid()) { 2813 llvm::Value *Args[] = { 2814 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2815 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 2816 // Ignore return result until untied tasks are supported. 2817 auto *Result = 2818 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 2819 // if (__kmpc_cancel()) { 2820 // __kmpc_cancel_barrier(); 2821 // exit from construct; 2822 // } 2823 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2824 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 2825 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 2826 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2827 CGF.EmitBlock(ExitBB); 2828 // __kmpc_cancel_barrier(); 2829 emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false); 2830 // exit from construct; 2831 CGF.EmitBranchThroughCleanup(CancelDest); 2832 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2833 } 2834 } 2835 } 2836 2837