1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CGCleanup.h" 17 #include "clang/AST/Decl.h" 18 #include "clang/AST/StmtOpenMP.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/IR/CallSite.h" 21 #include "llvm/IR/DerivedTypes.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/IR/Value.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <cassert> 26 27 using namespace clang; 28 using namespace CodeGen; 29 30 namespace { 31 /// \brief Base class for handling code generation inside OpenMP regions. 32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 33 public: 34 /// \brief Kinds of OpenMP regions used in codegen. 35 enum CGOpenMPRegionKind { 36 /// \brief Region with outlined function for standalone 'parallel' 37 /// directive. 38 ParallelOutlinedRegion, 39 /// \brief Region with outlined function for standalone 'task' directive. 40 TaskOutlinedRegion, 41 /// \brief Region for constructs that do not require function outlining, 42 /// like 'for', 'sections', 'atomic' etc. directives. 43 InlinedRegion, 44 }; 45 46 CGOpenMPRegionInfo(const CapturedStmt &CS, 47 const CGOpenMPRegionKind RegionKind, 48 const RegionCodeGenTy &CodeGen) 49 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 50 CodeGen(CodeGen) {} 51 52 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 53 const RegionCodeGenTy &CodeGen) 54 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), 55 CodeGen(CodeGen) {} 56 57 /// \brief Get a variable or parameter for storing global thread id 58 /// inside OpenMP construct. 59 virtual const VarDecl *getThreadIDVariable() const = 0; 60 61 /// \brief Emit the captured statement body. 62 virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 63 64 /// \brief Get an LValue for the current ThreadID variable. 65 /// \return LValue for thread id variable. This LValue always has type int32*. 66 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 67 68 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 69 70 static bool classof(const CGCapturedStmtInfo *Info) { 71 return Info->getKind() == CR_OpenMP; 72 } 73 74 protected: 75 CGOpenMPRegionKind RegionKind; 76 const RegionCodeGenTy &CodeGen; 77 }; 78 79 /// \brief API for captured statement code generation in OpenMP constructs. 80 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 81 public: 82 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 83 const RegionCodeGenTy &CodeGen) 84 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen), 85 ThreadIDVar(ThreadIDVar) { 86 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 87 } 88 /// \brief Get a variable or parameter for storing global thread id 89 /// inside OpenMP construct. 90 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 91 92 /// \brief Get the name of the capture helper. 93 StringRef getHelperName() const override { return ".omp_outlined."; } 94 95 static bool classof(const CGCapturedStmtInfo *Info) { 96 return CGOpenMPRegionInfo::classof(Info) && 97 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 98 ParallelOutlinedRegion; 99 } 100 101 private: 102 /// \brief A variable or parameter storing global thread id for OpenMP 103 /// constructs. 104 const VarDecl *ThreadIDVar; 105 }; 106 107 /// \brief API for captured statement code generation in OpenMP constructs. 108 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 109 public: 110 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 111 const VarDecl *ThreadIDVar, 112 const RegionCodeGenTy &CodeGen) 113 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen), 114 ThreadIDVar(ThreadIDVar) { 115 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 116 } 117 /// \brief Get a variable or parameter for storing global thread id 118 /// inside OpenMP construct. 119 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 120 121 /// \brief Get an LValue for the current ThreadID variable. 122 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 123 124 /// \brief Get the name of the capture helper. 125 StringRef getHelperName() const override { return ".omp_outlined."; } 126 127 static bool classof(const CGCapturedStmtInfo *Info) { 128 return CGOpenMPRegionInfo::classof(Info) && 129 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 130 TaskOutlinedRegion; 131 } 132 133 private: 134 /// \brief A variable or parameter storing global thread id for OpenMP 135 /// constructs. 136 const VarDecl *ThreadIDVar; 137 }; 138 139 /// \brief API for inlined captured statement code generation in OpenMP 140 /// constructs. 141 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 142 public: 143 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 144 const RegionCodeGenTy &CodeGen) 145 : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI), 146 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 147 // \brief Retrieve the value of the context parameter. 148 llvm::Value *getContextValue() const override { 149 if (OuterRegionInfo) 150 return OuterRegionInfo->getContextValue(); 151 llvm_unreachable("No context value for inlined OpenMP region"); 152 } 153 virtual void setContextValue(llvm::Value *V) override { 154 if (OuterRegionInfo) { 155 OuterRegionInfo->setContextValue(V); 156 return; 157 } 158 llvm_unreachable("No context value for inlined OpenMP region"); 159 } 160 /// \brief Lookup the captured field decl for a variable. 161 const FieldDecl *lookup(const VarDecl *VD) const override { 162 if (OuterRegionInfo) 163 return OuterRegionInfo->lookup(VD); 164 // If there is no outer outlined region,no need to lookup in a list of 165 // captured variables, we can use the original one. 166 return nullptr; 167 } 168 FieldDecl *getThisFieldDecl() const override { 169 if (OuterRegionInfo) 170 return OuterRegionInfo->getThisFieldDecl(); 171 return nullptr; 172 } 173 /// \brief Get a variable or parameter for storing global thread id 174 /// inside OpenMP construct. 175 const VarDecl *getThreadIDVariable() const override { 176 if (OuterRegionInfo) 177 return OuterRegionInfo->getThreadIDVariable(); 178 return nullptr; 179 } 180 181 /// \brief Get the name of the capture helper. 182 StringRef getHelperName() const override { 183 if (auto *OuterRegionInfo = getOldCSI()) 184 return OuterRegionInfo->getHelperName(); 185 llvm_unreachable("No helper name for inlined OpenMP construct"); 186 } 187 188 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 189 190 static bool classof(const CGCapturedStmtInfo *Info) { 191 return CGOpenMPRegionInfo::classof(Info) && 192 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 193 } 194 195 private: 196 /// \brief CodeGen info about outer OpenMP region. 197 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 198 CGOpenMPRegionInfo *OuterRegionInfo; 199 }; 200 201 /// \brief RAII for emitting code of OpenMP constructs. 202 class InlinedOpenMPRegionRAII { 203 CodeGenFunction &CGF; 204 205 public: 206 /// \brief Constructs region for combined constructs. 207 /// \param CodeGen Code generation sequence for combined directives. Includes 208 /// a list of functions used for code generation of implicitly inlined 209 /// regions. 210 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen) 211 : CGF(CGF) { 212 // Start emission for the construct. 213 CGF.CapturedStmtInfo = 214 new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen); 215 } 216 ~InlinedOpenMPRegionRAII() { 217 // Restore original CapturedStmtInfo only if we're done with code emission. 218 auto *OldCSI = 219 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 220 delete CGF.CapturedStmtInfo; 221 CGF.CapturedStmtInfo = OldCSI; 222 } 223 }; 224 225 } // namespace 226 227 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 228 return CGF.MakeNaturalAlignAddrLValue( 229 CGF.Builder.CreateAlignedLoad( 230 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 231 CGF.PointerAlignInBytes), 232 getThreadIDVariable() 233 ->getType() 234 ->castAs<PointerType>() 235 ->getPointeeType()); 236 } 237 238 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 239 // 1.2.2 OpenMP Language Terminology 240 // Structured block - An executable statement with a single entry at the 241 // top and a single exit at the bottom. 242 // The point of exit cannot be a branch out of the structured block. 243 // longjmp() and throw() must not violate the entry/exit criteria. 244 CGF.EHStack.pushTerminate(); 245 { 246 CodeGenFunction::RunCleanupsScope Scope(CGF); 247 CodeGen(CGF); 248 } 249 CGF.EHStack.popTerminate(); 250 } 251 252 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 253 CodeGenFunction &CGF) { 254 return CGF.MakeNaturalAlignAddrLValue( 255 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 256 getThreadIDVariable()->getType()); 257 } 258 259 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 260 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { 261 IdentTy = llvm::StructType::create( 262 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 263 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 264 CGM.Int8PtrTy /* psource */, nullptr); 265 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 266 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 267 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 268 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 269 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 270 } 271 272 void CGOpenMPRuntime::clear() { 273 InternalVars.clear(); 274 } 275 276 llvm::Value * 277 CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D, 278 const VarDecl *ThreadIDVar, 279 const RegionCodeGenTy &CodeGen) { 280 assert(ThreadIDVar->getType()->isPointerType() && 281 "thread id variable must be of type kmp_int32 *"); 282 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 283 CodeGenFunction CGF(CGM, true); 284 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen); 285 CGF.CapturedStmtInfo = &CGInfo; 286 return CGF.GenerateCapturedStmtFunction(*CS); 287 } 288 289 llvm::Value * 290 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D, 291 const VarDecl *ThreadIDVar, 292 const RegionCodeGenTy &CodeGen) { 293 assert(!ThreadIDVar->getType()->isPointerType() && 294 "thread id variable must be of type kmp_int32 for tasks"); 295 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 296 CodeGenFunction CGF(CGM, true); 297 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen); 298 CGF.CapturedStmtInfo = &CGInfo; 299 return CGF.GenerateCapturedStmtFunction(*CS); 300 } 301 302 llvm::Value * 303 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 304 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 305 if (!Entry) { 306 if (!DefaultOpenMPPSource) { 307 // Initialize default location for psource field of ident_t structure of 308 // all ident_t objects. Format is ";file;function;line;column;;". 309 // Taken from 310 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 311 DefaultOpenMPPSource = 312 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;"); 313 DefaultOpenMPPSource = 314 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 315 } 316 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 317 CGM.getModule(), IdentTy, /*isConstant*/ true, 318 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 319 DefaultOpenMPLocation->setUnnamedAddr(true); 320 321 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 322 llvm::Constant *Values[] = {Zero, 323 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 324 Zero, Zero, DefaultOpenMPPSource}; 325 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 326 DefaultOpenMPLocation->setInitializer(Init); 327 OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation; 328 return DefaultOpenMPLocation; 329 } 330 return Entry; 331 } 332 333 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 334 SourceLocation Loc, 335 OpenMPLocationFlags Flags) { 336 // If no debug info is generated - return global default location. 337 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 338 Loc.isInvalid()) 339 return getOrCreateDefaultLocation(Flags); 340 341 assert(CGF.CurFn && "No function in current CodeGenFunction."); 342 343 llvm::Value *LocValue = nullptr; 344 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 345 if (I != OpenMPLocThreadIDMap.end()) 346 LocValue = I->second.DebugLoc; 347 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 348 // GetOpenMPThreadID was called before this routine. 349 if (LocValue == nullptr) { 350 // Generate "ident_t .kmpc_loc.addr;" 351 llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); 352 AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); 353 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 354 Elem.second.DebugLoc = AI; 355 LocValue = AI; 356 357 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 358 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 359 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 360 llvm::ConstantExpr::getSizeOf(IdentTy), 361 CGM.PointerAlignInBytes); 362 } 363 364 // char **psource = &.kmpc_loc_<flags>.addr.psource; 365 auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0, 366 IdentField_PSource); 367 368 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 369 if (OMPDebugLoc == nullptr) { 370 SmallString<128> Buffer2; 371 llvm::raw_svector_ostream OS2(Buffer2); 372 // Build debug location 373 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 374 OS2 << ";" << PLoc.getFilename() << ";"; 375 if (const FunctionDecl *FD = 376 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 377 OS2 << FD->getQualifiedNameAsString(); 378 } 379 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 380 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 381 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 382 } 383 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 384 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 385 386 return LocValue; 387 } 388 389 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 390 SourceLocation Loc) { 391 assert(CGF.CurFn && "No function in current CodeGenFunction."); 392 393 llvm::Value *ThreadID = nullptr; 394 // Check whether we've already cached a load of the thread id in this 395 // function. 396 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 397 if (I != OpenMPLocThreadIDMap.end()) { 398 ThreadID = I->second.ThreadID; 399 if (ThreadID != nullptr) 400 return ThreadID; 401 } 402 if (auto OMPRegionInfo = 403 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 404 if (OMPRegionInfo->getThreadIDVariable()) { 405 // Check if this an outlined function with thread id passed as argument. 406 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 407 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 408 // If value loaded in entry block, cache it and use it everywhere in 409 // function. 410 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 411 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 412 Elem.second.ThreadID = ThreadID; 413 } 414 return ThreadID; 415 } 416 } 417 418 // This is not an outlined function region - need to call __kmpc_int32 419 // kmpc_global_thread_num(ident_t *loc). 420 // Generate thread id value and cache this value for use across the 421 // function. 422 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 423 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 424 ThreadID = 425 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 426 emitUpdateLocation(CGF, Loc)); 427 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 428 Elem.second.ThreadID = ThreadID; 429 return ThreadID; 430 } 431 432 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 433 assert(CGF.CurFn && "No function in current CodeGenFunction."); 434 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 435 OpenMPLocThreadIDMap.erase(CGF.CurFn); 436 } 437 438 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 439 return llvm::PointerType::getUnqual(IdentTy); 440 } 441 442 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 443 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 444 } 445 446 llvm::Constant * 447 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 448 llvm::Constant *RTLFn = nullptr; 449 switch (Function) { 450 case OMPRTL__kmpc_fork_call: { 451 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 452 // microtask, ...); 453 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 454 getKmpc_MicroPointerTy()}; 455 llvm::FunctionType *FnTy = 456 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 457 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 458 break; 459 } 460 case OMPRTL__kmpc_global_thread_num: { 461 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 462 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 463 llvm::FunctionType *FnTy = 464 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 465 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 466 break; 467 } 468 case OMPRTL__kmpc_threadprivate_cached: { 469 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 470 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 471 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 472 CGM.VoidPtrTy, CGM.SizeTy, 473 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 474 llvm::FunctionType *FnTy = 475 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 476 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 477 break; 478 } 479 case OMPRTL__kmpc_critical: { 480 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 481 // kmp_critical_name *crit); 482 llvm::Type *TypeParams[] = { 483 getIdentTyPointerTy(), CGM.Int32Ty, 484 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 485 llvm::FunctionType *FnTy = 486 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 487 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 488 break; 489 } 490 case OMPRTL__kmpc_threadprivate_register: { 491 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 492 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 493 // typedef void *(*kmpc_ctor)(void *); 494 auto KmpcCtorTy = 495 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 496 /*isVarArg*/ false)->getPointerTo(); 497 // typedef void *(*kmpc_cctor)(void *, void *); 498 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 499 auto KmpcCopyCtorTy = 500 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 501 /*isVarArg*/ false)->getPointerTo(); 502 // typedef void (*kmpc_dtor)(void *); 503 auto KmpcDtorTy = 504 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 505 ->getPointerTo(); 506 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 507 KmpcCopyCtorTy, KmpcDtorTy}; 508 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 509 /*isVarArg*/ false); 510 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 511 break; 512 } 513 case OMPRTL__kmpc_end_critical: { 514 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 515 // kmp_critical_name *crit); 516 llvm::Type *TypeParams[] = { 517 getIdentTyPointerTy(), CGM.Int32Ty, 518 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 519 llvm::FunctionType *FnTy = 520 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 521 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 522 break; 523 } 524 case OMPRTL__kmpc_cancel_barrier: { 525 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 526 // global_tid); 527 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 528 llvm::FunctionType *FnTy = 529 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 530 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 531 break; 532 } 533 case OMPRTL__kmpc_for_static_fini: { 534 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 535 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 536 llvm::FunctionType *FnTy = 537 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 538 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 539 break; 540 } 541 case OMPRTL__kmpc_push_num_threads: { 542 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 543 // kmp_int32 num_threads) 544 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 545 CGM.Int32Ty}; 546 llvm::FunctionType *FnTy = 547 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 548 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 549 break; 550 } 551 case OMPRTL__kmpc_serialized_parallel: { 552 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 553 // global_tid); 554 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 555 llvm::FunctionType *FnTy = 556 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 557 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 558 break; 559 } 560 case OMPRTL__kmpc_end_serialized_parallel: { 561 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 562 // global_tid); 563 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 564 llvm::FunctionType *FnTy = 565 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 566 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 567 break; 568 } 569 case OMPRTL__kmpc_flush: { 570 // Build void __kmpc_flush(ident_t *loc); 571 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 572 llvm::FunctionType *FnTy = 573 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 574 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 575 break; 576 } 577 case OMPRTL__kmpc_master: { 578 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 579 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 580 llvm::FunctionType *FnTy = 581 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 582 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 583 break; 584 } 585 case OMPRTL__kmpc_end_master: { 586 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 587 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 588 llvm::FunctionType *FnTy = 589 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 590 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 591 break; 592 } 593 case OMPRTL__kmpc_omp_taskyield: { 594 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 595 // int end_part); 596 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 597 llvm::FunctionType *FnTy = 598 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 599 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 600 break; 601 } 602 case OMPRTL__kmpc_single: { 603 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 604 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 605 llvm::FunctionType *FnTy = 606 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 607 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 608 break; 609 } 610 case OMPRTL__kmpc_end_single: { 611 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 612 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 613 llvm::FunctionType *FnTy = 614 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 615 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 616 break; 617 } 618 case OMPRTL__kmpc_omp_task_alloc: { 619 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 620 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 621 // kmp_routine_entry_t *task_entry); 622 assert(KmpRoutineEntryPtrTy != nullptr && 623 "Type kmp_routine_entry_t must be created."); 624 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 625 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 626 // Return void * and then cast to particular kmp_task_t type. 627 llvm::FunctionType *FnTy = 628 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 629 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 630 break; 631 } 632 case OMPRTL__kmpc_omp_task: { 633 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 634 // *new_task); 635 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 636 CGM.VoidPtrTy}; 637 llvm::FunctionType *FnTy = 638 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 639 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 640 break; 641 } 642 case OMPRTL__kmpc_copyprivate: { 643 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 644 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 645 // kmp_int32 didit); 646 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 647 auto *CpyFnTy = 648 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 649 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 650 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 651 CGM.Int32Ty}; 652 llvm::FunctionType *FnTy = 653 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 654 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 655 break; 656 } 657 case OMPRTL__kmpc_reduce: { 658 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 659 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 660 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 661 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 662 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 663 /*isVarArg=*/false); 664 llvm::Type *TypeParams[] = { 665 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 666 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 667 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 668 llvm::FunctionType *FnTy = 669 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 670 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 671 break; 672 } 673 case OMPRTL__kmpc_reduce_nowait: { 674 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 675 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 676 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 677 // *lck); 678 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 679 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 680 /*isVarArg=*/false); 681 llvm::Type *TypeParams[] = { 682 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 683 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 684 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 685 llvm::FunctionType *FnTy = 686 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 687 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 688 break; 689 } 690 case OMPRTL__kmpc_end_reduce: { 691 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 692 // kmp_critical_name *lck); 693 llvm::Type *TypeParams[] = { 694 getIdentTyPointerTy(), CGM.Int32Ty, 695 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 696 llvm::FunctionType *FnTy = 697 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 698 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 699 break; 700 } 701 case OMPRTL__kmpc_end_reduce_nowait: { 702 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 703 // kmp_critical_name *lck); 704 llvm::Type *TypeParams[] = { 705 getIdentTyPointerTy(), CGM.Int32Ty, 706 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 707 llvm::FunctionType *FnTy = 708 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 709 RTLFn = 710 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 711 break; 712 } 713 case OMPRTL__kmpc_omp_task_begin_if0: { 714 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 715 // *new_task); 716 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 717 CGM.VoidPtrTy}; 718 llvm::FunctionType *FnTy = 719 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 720 RTLFn = 721 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 722 break; 723 } 724 case OMPRTL__kmpc_omp_task_complete_if0: { 725 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 726 // *new_task); 727 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 728 CGM.VoidPtrTy}; 729 llvm::FunctionType *FnTy = 730 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 731 RTLFn = CGM.CreateRuntimeFunction(FnTy, 732 /*Name=*/"__kmpc_omp_task_complete_if0"); 733 break; 734 } 735 case OMPRTL__kmpc_ordered: { 736 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 737 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 738 llvm::FunctionType *FnTy = 739 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 740 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 741 break; 742 } 743 case OMPRTL__kmpc_end_ordered: { 744 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 745 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 746 llvm::FunctionType *FnTy = 747 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 748 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 749 break; 750 } 751 case OMPRTL__kmpc_omp_taskwait: { 752 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 753 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 754 llvm::FunctionType *FnTy = 755 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 756 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 757 break; 758 } 759 } 760 return RTLFn; 761 } 762 763 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 764 bool IVSigned) { 765 assert((IVSize == 32 || IVSize == 64) && 766 "IV size is not compatible with the omp runtime"); 767 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 768 : "__kmpc_for_static_init_4u") 769 : (IVSigned ? "__kmpc_for_static_init_8" 770 : "__kmpc_for_static_init_8u"); 771 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 772 auto PtrTy = llvm::PointerType::getUnqual(ITy); 773 llvm::Type *TypeParams[] = { 774 getIdentTyPointerTy(), // loc 775 CGM.Int32Ty, // tid 776 CGM.Int32Ty, // schedtype 777 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 778 PtrTy, // p_lower 779 PtrTy, // p_upper 780 PtrTy, // p_stride 781 ITy, // incr 782 ITy // chunk 783 }; 784 llvm::FunctionType *FnTy = 785 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 786 return CGM.CreateRuntimeFunction(FnTy, Name); 787 } 788 789 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 790 bool IVSigned) { 791 assert((IVSize == 32 || IVSize == 64) && 792 "IV size is not compatible with the omp runtime"); 793 auto Name = 794 IVSize == 32 795 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 796 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 797 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 798 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 799 CGM.Int32Ty, // tid 800 CGM.Int32Ty, // schedtype 801 ITy, // lower 802 ITy, // upper 803 ITy, // stride 804 ITy // chunk 805 }; 806 llvm::FunctionType *FnTy = 807 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 808 return CGM.CreateRuntimeFunction(FnTy, Name); 809 } 810 811 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 812 bool IVSigned) { 813 assert((IVSize == 32 || IVSize == 64) && 814 "IV size is not compatible with the omp runtime"); 815 auto Name = 816 IVSize == 32 817 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 818 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 819 llvm::Type *TypeParams[] = { 820 getIdentTyPointerTy(), // loc 821 CGM.Int32Ty, // tid 822 }; 823 llvm::FunctionType *FnTy = 824 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 825 return CGM.CreateRuntimeFunction(FnTy, Name); 826 } 827 828 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 829 bool IVSigned) { 830 assert((IVSize == 32 || IVSize == 64) && 831 "IV size is not compatible with the omp runtime"); 832 auto Name = 833 IVSize == 32 834 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 835 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 836 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 837 auto PtrTy = llvm::PointerType::getUnqual(ITy); 838 llvm::Type *TypeParams[] = { 839 getIdentTyPointerTy(), // loc 840 CGM.Int32Ty, // tid 841 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 842 PtrTy, // p_lower 843 PtrTy, // p_upper 844 PtrTy // p_stride 845 }; 846 llvm::FunctionType *FnTy = 847 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 848 return CGM.CreateRuntimeFunction(FnTy, Name); 849 } 850 851 llvm::Constant * 852 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 853 // Lookup the entry, lazily creating it if necessary. 854 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 855 Twine(CGM.getMangledName(VD)) + ".cache."); 856 } 857 858 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 859 const VarDecl *VD, 860 llvm::Value *VDAddr, 861 SourceLocation Loc) { 862 auto VarTy = VDAddr->getType()->getPointerElementType(); 863 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 864 CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), 865 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 866 getOrCreateThreadPrivateCache(VD)}; 867 return CGF.EmitRuntimeCall( 868 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); 869 } 870 871 void CGOpenMPRuntime::emitThreadPrivateVarInit( 872 CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, 873 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 874 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 875 // library. 876 auto OMPLoc = emitUpdateLocation(CGF, Loc); 877 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 878 OMPLoc); 879 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 880 // to register constructor/destructor for variable. 881 llvm::Value *Args[] = {OMPLoc, 882 CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), 883 Ctor, CopyCtor, Dtor}; 884 CGF.EmitRuntimeCall( 885 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 886 } 887 888 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 889 const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, 890 bool PerformInit, CodeGenFunction *CGF) { 891 VD = VD->getDefinition(CGM.getContext()); 892 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 893 ThreadPrivateWithDefinition.insert(VD); 894 QualType ASTTy = VD->getType(); 895 896 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 897 auto Init = VD->getAnyInitializer(); 898 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 899 // Generate function that re-emits the declaration's initializer into the 900 // threadprivate copy of the variable VD 901 CodeGenFunction CtorCGF(CGM); 902 FunctionArgList Args; 903 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 904 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 905 Args.push_back(&Dst); 906 907 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 908 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 909 /*isVariadic=*/false); 910 auto FTy = CGM.getTypes().GetFunctionType(FI); 911 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 912 FTy, ".__kmpc_global_ctor_.", Loc); 913 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 914 Args, SourceLocation()); 915 auto ArgVal = CtorCGF.EmitLoadOfScalar( 916 CtorCGF.GetAddrOfLocalVar(&Dst), 917 /*Volatile=*/false, CGM.PointerAlignInBytes, 918 CGM.getContext().VoidPtrTy, Dst.getLocation()); 919 auto Arg = CtorCGF.Builder.CreatePointerCast( 920 ArgVal, 921 CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); 922 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 923 /*IsInitializer=*/true); 924 ArgVal = CtorCGF.EmitLoadOfScalar( 925 CtorCGF.GetAddrOfLocalVar(&Dst), 926 /*Volatile=*/false, CGM.PointerAlignInBytes, 927 CGM.getContext().VoidPtrTy, Dst.getLocation()); 928 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 929 CtorCGF.FinishFunction(); 930 Ctor = Fn; 931 } 932 if (VD->getType().isDestructedType() != QualType::DK_none) { 933 // Generate function that emits destructor call for the threadprivate copy 934 // of the variable VD 935 CodeGenFunction DtorCGF(CGM); 936 FunctionArgList Args; 937 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 938 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 939 Args.push_back(&Dst); 940 941 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 942 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 943 /*isVariadic=*/false); 944 auto FTy = CGM.getTypes().GetFunctionType(FI); 945 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 946 FTy, ".__kmpc_global_dtor_.", Loc); 947 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 948 SourceLocation()); 949 auto ArgVal = DtorCGF.EmitLoadOfScalar( 950 DtorCGF.GetAddrOfLocalVar(&Dst), 951 /*Volatile=*/false, CGM.PointerAlignInBytes, 952 CGM.getContext().VoidPtrTy, Dst.getLocation()); 953 DtorCGF.emitDestroy(ArgVal, ASTTy, 954 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 955 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 956 DtorCGF.FinishFunction(); 957 Dtor = Fn; 958 } 959 // Do not emit init function if it is not required. 960 if (!Ctor && !Dtor) 961 return nullptr; 962 963 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 964 auto CopyCtorTy = 965 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 966 /*isVarArg=*/false)->getPointerTo(); 967 // Copying constructor for the threadprivate variable. 968 // Must be NULL - reserved by runtime, but currently it requires that this 969 // parameter is always NULL. Otherwise it fires assertion. 970 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 971 if (Ctor == nullptr) { 972 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 973 /*isVarArg=*/false)->getPointerTo(); 974 Ctor = llvm::Constant::getNullValue(CtorTy); 975 } 976 if (Dtor == nullptr) { 977 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 978 /*isVarArg=*/false)->getPointerTo(); 979 Dtor = llvm::Constant::getNullValue(DtorTy); 980 } 981 if (!CGF) { 982 auto InitFunctionTy = 983 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 984 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 985 InitFunctionTy, ".__omp_threadprivate_init_."); 986 CodeGenFunction InitCGF(CGM); 987 FunctionArgList ArgList; 988 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 989 CGM.getTypes().arrangeNullaryFunction(), ArgList, 990 Loc); 991 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 992 InitCGF.FinishFunction(); 993 return InitFunction; 994 } 995 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 996 } 997 return nullptr; 998 } 999 1000 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1001 /// function. Here is the logic: 1002 /// if (Cond) { 1003 /// ThenGen(); 1004 /// } else { 1005 /// ElseGen(); 1006 /// } 1007 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1008 const RegionCodeGenTy &ThenGen, 1009 const RegionCodeGenTy &ElseGen) { 1010 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1011 1012 // If the condition constant folds and can be elided, try to avoid emitting 1013 // the condition and the dead arm of the if/else. 1014 bool CondConstant; 1015 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1016 CodeGenFunction::RunCleanupsScope Scope(CGF); 1017 if (CondConstant) { 1018 ThenGen(CGF); 1019 } else { 1020 ElseGen(CGF); 1021 } 1022 return; 1023 } 1024 1025 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1026 // emit the conditional branch. 1027 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1028 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1029 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1030 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1031 1032 // Emit the 'then' code. 1033 CGF.EmitBlock(ThenBlock); 1034 { 1035 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1036 ThenGen(CGF); 1037 } 1038 CGF.EmitBranch(ContBlock); 1039 // Emit the 'else' code if present. 1040 { 1041 // There is no need to emit line number for unconditional branch. 1042 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1043 CGF.EmitBlock(ElseBlock); 1044 } 1045 { 1046 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1047 ElseGen(CGF); 1048 } 1049 { 1050 // There is no need to emit line number for unconditional branch. 1051 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1052 CGF.EmitBranch(ContBlock); 1053 } 1054 // Emit the continuation block for code after the if. 1055 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1056 } 1057 1058 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1059 llvm::Value *OutlinedFn, 1060 llvm::Value *CapturedStruct, 1061 const Expr *IfCond) { 1062 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1063 auto &&ThenGen = 1064 [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) { 1065 // Build call __kmpc_fork_call(loc, 1, microtask, 1066 // captured_struct/*context*/) 1067 llvm::Value *Args[] = { 1068 RTLoc, 1069 CGF.Builder.getInt32( 1070 1), // Number of arguments after 'microtask' argument 1071 // (there is only one additional argument - 'context') 1072 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), 1073 CGF.EmitCastToVoidPtr(CapturedStruct)}; 1074 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 1075 CGF.EmitRuntimeCall(RTLFn, Args); 1076 }; 1077 auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc]( 1078 CodeGenFunction &CGF) { 1079 auto ThreadID = getThreadID(CGF, Loc); 1080 // Build calls: 1081 // __kmpc_serialized_parallel(&Loc, GTid); 1082 llvm::Value *Args[] = {RTLoc, ThreadID}; 1083 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 1084 Args); 1085 1086 // OutlinedFn(>id, &zero, CapturedStruct); 1087 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 1088 auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, 1089 /*Signed*/ true); 1090 auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr"); 1091 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1092 llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct}; 1093 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1094 1095 // __kmpc_end_serialized_parallel(&Loc, GTid); 1096 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 1097 CGF.EmitRuntimeCall( 1098 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 1099 }; 1100 if (IfCond) { 1101 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1102 } else { 1103 CodeGenFunction::RunCleanupsScope Scope(CGF); 1104 ThenGen(CGF); 1105 } 1106 } 1107 1108 // If we're inside an (outlined) parallel region, use the region info's 1109 // thread-ID variable (it is passed in a first argument of the outlined function 1110 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1111 // regular serial code region, get thread ID by calling kmp_int32 1112 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1113 // return the address of that temp. 1114 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1115 SourceLocation Loc) { 1116 if (auto OMPRegionInfo = 1117 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1118 if (OMPRegionInfo->getThreadIDVariable()) 1119 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1120 1121 auto ThreadID = getThreadID(CGF, Loc); 1122 auto Int32Ty = 1123 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1124 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1125 CGF.EmitStoreOfScalar(ThreadID, 1126 CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty)); 1127 1128 return ThreadIDTemp; 1129 } 1130 1131 llvm::Constant * 1132 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1133 const llvm::Twine &Name) { 1134 SmallString<256> Buffer; 1135 llvm::raw_svector_ostream Out(Buffer); 1136 Out << Name; 1137 auto RuntimeName = Out.str(); 1138 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1139 if (Elem.second) { 1140 assert(Elem.second->getType()->getPointerElementType() == Ty && 1141 "OMP internal variable has different type than requested"); 1142 return &*Elem.second; 1143 } 1144 1145 return Elem.second = new llvm::GlobalVariable( 1146 CGM.getModule(), Ty, /*IsConstant*/ false, 1147 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1148 Elem.first()); 1149 } 1150 1151 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1152 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1153 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1154 } 1155 1156 namespace { 1157 class CallEndCleanup : public EHScopeStack::Cleanup { 1158 public: 1159 typedef ArrayRef<llvm::Value *> CleanupValuesTy; 1160 private: 1161 llvm::Value *Callee; 1162 llvm::SmallVector<llvm::Value *, 8> Args; 1163 1164 public: 1165 CallEndCleanup(llvm::Value *Callee, CleanupValuesTy Args) 1166 : Callee(Callee), Args(Args.begin(), Args.end()) {} 1167 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 1168 CGF.EmitRuntimeCall(Callee, Args); 1169 } 1170 }; 1171 } // namespace 1172 1173 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1174 StringRef CriticalName, 1175 const RegionCodeGenTy &CriticalOpGen, 1176 SourceLocation Loc) { 1177 // __kmpc_critical(ident_t *, gtid, Lock); 1178 // CriticalOpGen(); 1179 // __kmpc_end_critical(ident_t *, gtid, Lock); 1180 // Prepare arguments and build a call to __kmpc_critical 1181 { 1182 CodeGenFunction::RunCleanupsScope Scope(CGF); 1183 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1184 getCriticalRegionLock(CriticalName)}; 1185 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 1186 // Build a call to __kmpc_end_critical 1187 CGF.EHStack.pushCleanup<CallEndCleanup>( 1188 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), 1189 llvm::makeArrayRef(Args)); 1190 emitInlinedDirective(CGF, CriticalOpGen); 1191 } 1192 } 1193 1194 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 1195 const RegionCodeGenTy &BodyOpGen) { 1196 llvm::Value *CallBool = CGF.EmitScalarConversion( 1197 IfCond, 1198 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 1199 CGF.getContext().BoolTy); 1200 1201 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1202 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 1203 // Generate the branch (If-stmt) 1204 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1205 CGF.EmitBlock(ThenBlock); 1206 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen); 1207 // Emit the rest of bblocks/branches 1208 CGF.EmitBranch(ContBlock); 1209 CGF.EmitBlock(ContBlock, true); 1210 } 1211 1212 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 1213 const RegionCodeGenTy &MasterOpGen, 1214 SourceLocation Loc) { 1215 // if(__kmpc_master(ident_t *, gtid)) { 1216 // MasterOpGen(); 1217 // __kmpc_end_master(ident_t *, gtid); 1218 // } 1219 // Prepare arguments and build a call to __kmpc_master 1220 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1221 auto *IsMaster = 1222 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 1223 emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void { 1224 CodeGenFunction::RunCleanupsScope Scope(CGF); 1225 CGF.EHStack.pushCleanup<CallEndCleanup>( 1226 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), 1227 llvm::makeArrayRef(Args)); 1228 MasterOpGen(CGF); 1229 }); 1230 } 1231 1232 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 1233 SourceLocation Loc) { 1234 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 1235 llvm::Value *Args[] = { 1236 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1237 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 1238 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 1239 } 1240 1241 static llvm::Value *emitCopyprivateCopyFunction( 1242 CodeGenModule &CGM, llvm::Type *ArgsType, 1243 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 1244 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 1245 auto &C = CGM.getContext(); 1246 // void copy_func(void *LHSArg, void *RHSArg); 1247 FunctionArgList Args; 1248 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1249 C.VoidPtrTy); 1250 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1251 C.VoidPtrTy); 1252 Args.push_back(&LHSArg); 1253 Args.push_back(&RHSArg); 1254 FunctionType::ExtInfo EI; 1255 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1256 C.VoidTy, Args, EI, /*isVariadic=*/false); 1257 auto *Fn = llvm::Function::Create( 1258 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1259 ".omp.copyprivate.copy_func", &CGM.getModule()); 1260 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 1261 CodeGenFunction CGF(CGM); 1262 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1263 // Dest = (void*[n])(LHSArg); 1264 // Src = (void*[n])(RHSArg); 1265 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1266 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), 1267 CGF.PointerAlignInBytes), 1268 ArgsType); 1269 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1270 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), 1271 CGF.PointerAlignInBytes), 1272 ArgsType); 1273 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1274 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1275 // ... 1276 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1277 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1278 auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1279 CGF.Builder.CreateAlignedLoad( 1280 CGF.Builder.CreateStructGEP(nullptr, LHS, I), 1281 CGM.PointerAlignInBytes), 1282 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); 1283 auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1284 CGF.Builder.CreateAlignedLoad( 1285 CGF.Builder.CreateStructGEP(nullptr, RHS, I), 1286 CGM.PointerAlignInBytes), 1287 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); 1288 CGF.EmitOMPCopy(CGF, CopyprivateVars[I]->getType(), DestAddr, SrcAddr, 1289 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()), 1290 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()), 1291 AssignmentOps[I]); 1292 } 1293 CGF.FinishFunction(); 1294 return Fn; 1295 } 1296 1297 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1298 const RegionCodeGenTy &SingleOpGen, 1299 SourceLocation Loc, 1300 ArrayRef<const Expr *> CopyprivateVars, 1301 ArrayRef<const Expr *> SrcExprs, 1302 ArrayRef<const Expr *> DstExprs, 1303 ArrayRef<const Expr *> AssignmentOps) { 1304 assert(CopyprivateVars.size() == SrcExprs.size() && 1305 CopyprivateVars.size() == DstExprs.size() && 1306 CopyprivateVars.size() == AssignmentOps.size()); 1307 auto &C = CGM.getContext(); 1308 // int32 did_it = 0; 1309 // if(__kmpc_single(ident_t *, gtid)) { 1310 // SingleOpGen(); 1311 // __kmpc_end_single(ident_t *, gtid); 1312 // did_it = 1; 1313 // } 1314 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1315 // <copy_func>, did_it); 1316 1317 llvm::AllocaInst *DidIt = nullptr; 1318 if (!CopyprivateVars.empty()) { 1319 // int32 did_it = 0; 1320 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1321 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 1322 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt, 1323 DidIt->getAlignment()); 1324 } 1325 // Prepare arguments and build a call to __kmpc_single 1326 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1327 auto *IsSingle = 1328 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 1329 emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void { 1330 CodeGenFunction::RunCleanupsScope Scope(CGF); 1331 CGF.EHStack.pushCleanup<CallEndCleanup>( 1332 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), 1333 llvm::makeArrayRef(Args)); 1334 SingleOpGen(CGF); 1335 if (DidIt) { 1336 // did_it = 1; 1337 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt, 1338 DidIt->getAlignment()); 1339 } 1340 }); 1341 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1342 // <copy_func>, did_it); 1343 if (DidIt) { 1344 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 1345 auto CopyprivateArrayTy = 1346 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1347 /*IndexTypeQuals=*/0); 1348 // Create a list of all private variables for copyprivate. 1349 auto *CopyprivateList = 1350 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 1351 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 1352 auto *Elem = CGF.Builder.CreateStructGEP( 1353 CopyprivateList->getAllocatedType(), CopyprivateList, I); 1354 CGF.Builder.CreateAlignedStore( 1355 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1356 CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy), 1357 Elem, CGM.PointerAlignInBytes); 1358 } 1359 // Build function that copies private values from single region to all other 1360 // threads in the corresponding parallel region. 1361 auto *CpyFn = emitCopyprivateCopyFunction( 1362 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 1363 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 1364 auto *BufSize = llvm::ConstantInt::get( 1365 CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity()); 1366 auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 1367 CGF.VoidPtrTy); 1368 auto *DidItVal = 1369 CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes); 1370 llvm::Value *Args[] = { 1371 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 1372 getThreadID(CGF, Loc), // i32 <gtid> 1373 BufSize, // size_t <buf_size> 1374 CL, // void *<copyprivate list> 1375 CpyFn, // void (*) (void *, void *) <copy_func> 1376 DidItVal // i32 did_it 1377 }; 1378 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 1379 } 1380 } 1381 1382 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 1383 const RegionCodeGenTy &OrderedOpGen, 1384 SourceLocation Loc) { 1385 // __kmpc_ordered(ident_t *, gtid); 1386 // OrderedOpGen(); 1387 // __kmpc_end_ordered(ident_t *, gtid); 1388 // Prepare arguments and build a call to __kmpc_ordered 1389 { 1390 CodeGenFunction::RunCleanupsScope Scope(CGF); 1391 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1392 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); 1393 // Build a call to __kmpc_end_ordered 1394 CGF.EHStack.pushCleanup<CallEndCleanup>( 1395 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), 1396 llvm::makeArrayRef(Args)); 1397 emitInlinedDirective(CGF, OrderedOpGen); 1398 } 1399 } 1400 1401 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 1402 OpenMPDirectiveKind Kind) { 1403 // Build call __kmpc_cancel_barrier(loc, thread_id); 1404 OpenMPLocationFlags Flags = OMP_IDENT_KMPC; 1405 if (Kind == OMPD_for) { 1406 Flags = 1407 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); 1408 } else if (Kind == OMPD_sections) { 1409 Flags = static_cast<OpenMPLocationFlags>(Flags | 1410 OMP_IDENT_BARRIER_IMPL_SECTIONS); 1411 } else if (Kind == OMPD_single) { 1412 Flags = 1413 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); 1414 } else if (Kind == OMPD_barrier) { 1415 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); 1416 } else { 1417 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); 1418 } 1419 // Build call __kmpc_cancel_barrier(loc, thread_id); 1420 // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this 1421 // one provides the same functionality and adds initial support for 1422 // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier() 1423 // is provided default by the runtime library so it safe to make such 1424 // replacement. 1425 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 1426 getThreadID(CGF, Loc)}; 1427 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 1428 } 1429 1430 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 1431 /// the enum sched_type in kmp.h). 1432 enum OpenMPSchedType { 1433 /// \brief Lower bound for default (unordered) versions. 1434 OMP_sch_lower = 32, 1435 OMP_sch_static_chunked = 33, 1436 OMP_sch_static = 34, 1437 OMP_sch_dynamic_chunked = 35, 1438 OMP_sch_guided_chunked = 36, 1439 OMP_sch_runtime = 37, 1440 OMP_sch_auto = 38, 1441 /// \brief Lower bound for 'ordered' versions. 1442 OMP_ord_lower = 64, 1443 /// \brief Lower bound for 'nomerge' versions. 1444 OMP_nm_lower = 160, 1445 }; 1446 1447 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 1448 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 1449 bool Chunked) { 1450 switch (ScheduleKind) { 1451 case OMPC_SCHEDULE_static: 1452 return Chunked ? OMP_sch_static_chunked : OMP_sch_static; 1453 case OMPC_SCHEDULE_dynamic: 1454 return OMP_sch_dynamic_chunked; 1455 case OMPC_SCHEDULE_guided: 1456 return OMP_sch_guided_chunked; 1457 case OMPC_SCHEDULE_auto: 1458 return OMP_sch_auto; 1459 case OMPC_SCHEDULE_runtime: 1460 return OMP_sch_runtime; 1461 case OMPC_SCHEDULE_unknown: 1462 assert(!Chunked && "chunk was specified but schedule kind not known"); 1463 return OMP_sch_static; 1464 } 1465 llvm_unreachable("Unexpected runtime schedule"); 1466 } 1467 1468 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 1469 bool Chunked) const { 1470 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 1471 return Schedule == OMP_sch_static; 1472 } 1473 1474 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 1475 auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false); 1476 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 1477 return Schedule != OMP_sch_static; 1478 } 1479 1480 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, 1481 OpenMPScheduleClauseKind ScheduleKind, 1482 unsigned IVSize, bool IVSigned, 1483 llvm::Value *IL, llvm::Value *LB, 1484 llvm::Value *UB, llvm::Value *ST, 1485 llvm::Value *Chunk) { 1486 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr); 1487 if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) { 1488 // Call __kmpc_dispatch_init( 1489 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 1490 // kmp_int[32|64] lower, kmp_int[32|64] upper, 1491 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 1492 1493 // If the Chunk was not specified in the clause - use default value 1. 1494 if (Chunk == nullptr) 1495 Chunk = CGF.Builder.getIntN(IVSize, 1); 1496 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1497 getThreadID(CGF, Loc), 1498 CGF.Builder.getInt32(Schedule), // Schedule type 1499 CGF.Builder.getIntN(IVSize, 0), // Lower 1500 UB, // Upper 1501 CGF.Builder.getIntN(IVSize, 1), // Stride 1502 Chunk // Chunk 1503 }; 1504 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 1505 } else { 1506 // Call __kmpc_for_static_init( 1507 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 1508 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 1509 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 1510 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 1511 if (Chunk == nullptr) { 1512 assert(Schedule == OMP_sch_static && 1513 "expected static non-chunked schedule"); 1514 // If the Chunk was not specified in the clause - use default value 1. 1515 Chunk = CGF.Builder.getIntN(IVSize, 1); 1516 } else 1517 assert(Schedule == OMP_sch_static_chunked && 1518 "expected static chunked schedule"); 1519 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1520 getThreadID(CGF, Loc), 1521 CGF.Builder.getInt32(Schedule), // Schedule type 1522 IL, // &isLastIter 1523 LB, // &LB 1524 UB, // &UB 1525 ST, // &Stride 1526 CGF.Builder.getIntN(IVSize, 1), // Incr 1527 Chunk // Chunk 1528 }; 1529 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); 1530 } 1531 } 1532 1533 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 1534 SourceLocation Loc) { 1535 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1536 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1537 getThreadID(CGF, Loc)}; 1538 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1539 Args); 1540 } 1541 1542 void CGOpenMPRuntime::emitForOrderedDynamicIterationEnd(CodeGenFunction &CGF, 1543 SourceLocation Loc, 1544 unsigned IVSize, 1545 bool IVSigned) { 1546 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 1547 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1548 getThreadID(CGF, Loc)}; 1549 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 1550 } 1551 1552 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 1553 SourceLocation Loc, unsigned IVSize, 1554 bool IVSigned, llvm::Value *IL, 1555 llvm::Value *LB, llvm::Value *UB, 1556 llvm::Value *ST) { 1557 // Call __kmpc_dispatch_next( 1558 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 1559 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 1560 // kmp_int[32|64] *p_stride); 1561 llvm::Value *Args[] = { 1562 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 1563 IL, // &isLastIter 1564 LB, // &Lower 1565 UB, // &Upper 1566 ST // &Stride 1567 }; 1568 llvm::Value *Call = 1569 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 1570 return CGF.EmitScalarConversion( 1571 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 1572 CGF.getContext().BoolTy); 1573 } 1574 1575 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1576 llvm::Value *NumThreads, 1577 SourceLocation Loc) { 1578 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1579 llvm::Value *Args[] = { 1580 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1581 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1582 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1583 Args); 1584 } 1585 1586 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1587 SourceLocation Loc) { 1588 // Build call void __kmpc_flush(ident_t *loc) 1589 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1590 emitUpdateLocation(CGF, Loc)); 1591 } 1592 1593 namespace { 1594 /// \brief Indexes of fields for type kmp_task_t. 1595 enum KmpTaskTFields { 1596 /// \brief List of shared variables. 1597 KmpTaskTShareds, 1598 /// \brief Task routine. 1599 KmpTaskTRoutine, 1600 /// \brief Partition id for the untied tasks. 1601 KmpTaskTPartId, 1602 /// \brief Function with call of destructors for private variables. 1603 KmpTaskTDestructors, 1604 /// \brief Record with list of all private/firstprivate copies for the task 1605 /// directive. 1606 KmpTaskTPrivates, 1607 }; 1608 } // namespace 1609 1610 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 1611 if (!KmpRoutineEntryPtrTy) { 1612 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 1613 auto &C = CGM.getContext(); 1614 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 1615 FunctionProtoType::ExtProtoInfo EPI; 1616 KmpRoutineEntryPtrQTy = C.getPointerType( 1617 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 1618 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 1619 } 1620 } 1621 1622 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1623 QualType FieldTy) { 1624 auto *Field = FieldDecl::Create( 1625 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1626 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1627 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1628 Field->setAccess(AS_public); 1629 DC->addDecl(Field); 1630 } 1631 1632 namespace { 1633 typedef std::pair<CharUnits /*Align*/, 1634 std::pair<const VarDecl *, const VarDecl *>> VDPair; 1635 } // namespace 1636 1637 static RecordDecl *createPrivatesRecordDecl(CodeGenModule &CGM, 1638 const ArrayRef<VDPair> Privates) { 1639 if (!Privates.empty()) { 1640 auto &C = CGM.getContext(); 1641 // Build struct .kmp_privates_t. { 1642 // /* private vars */ 1643 // }; 1644 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 1645 RD->startDefinition(); 1646 for (auto &&Pair : Privates) { 1647 addFieldToRecordDecl(C, RD, 1648 Pair.second.first->getType().getNonReferenceType()); 1649 } 1650 // TODO: add firstprivate fields. 1651 RD->completeDefinition(); 1652 return RD; 1653 } 1654 return nullptr; 1655 } 1656 1657 static RecordDecl *createKmpTaskTRecordDecl(CodeGenModule &CGM, 1658 QualType KmpInt32Ty, 1659 QualType KmpRoutineEntryPointerQTy, 1660 const ArrayRef<VDPair> Privates) { 1661 auto &C = CGM.getContext(); 1662 // Build struct kmp_task_t { 1663 // void * shareds; 1664 // kmp_routine_entry_t routine; 1665 // kmp_int32 part_id; 1666 // kmp_routine_entry_t destructors; 1667 // /* private vars */ 1668 // }; 1669 auto *RD = C.buildImplicitRecord("kmp_task_t"); 1670 RD->startDefinition(); 1671 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1672 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1673 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1674 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1675 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 1676 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 1677 } 1678 RD->completeDefinition(); 1679 return RD; 1680 } 1681 1682 /// \brief Emit a proxy function which accepts kmp_task_t as the second 1683 /// argument. 1684 /// \code 1685 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 1686 /// TaskFunction(gtid, tt->part_id, tt->shareds); 1687 /// return 0; 1688 /// } 1689 /// \endcode 1690 static llvm::Value * 1691 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 1692 QualType KmpInt32Ty, QualType KmpTaskTPtrQTy, 1693 QualType SharedsPtrTy, llvm::Value *TaskFunction, 1694 llvm::Type *KmpTaskTTy) { 1695 auto &C = CGM.getContext(); 1696 FunctionArgList Args; 1697 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 1698 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 1699 /*Id=*/nullptr, KmpTaskTPtrQTy); 1700 Args.push_back(&GtidArg); 1701 Args.push_back(&TaskTypeArg); 1702 FunctionType::ExtInfo Info; 1703 auto &TaskEntryFnInfo = 1704 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 1705 /*isVariadic=*/false); 1706 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 1707 auto *TaskEntry = 1708 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 1709 ".omp_task_entry.", &CGM.getModule()); 1710 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry); 1711 CodeGenFunction CGF(CGM); 1712 CGF.disableDebugInfo(); 1713 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 1714 1715 // TaskFunction(gtid, tt->part_id, tt->shareds); 1716 auto *GtidParam = CGF.EmitLoadOfScalar( 1717 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, 1718 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); 1719 auto TaskTypeArgAddr = CGF.EmitLoadOfScalar( 1720 CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false, 1721 CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc); 1722 auto *PartidPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr, 1723 /*Idx=*/KmpTaskTPartId); 1724 auto *PartidParam = CGF.EmitLoadOfScalar( 1725 PartidPtr, /*Volatile=*/false, 1726 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); 1727 auto *SharedsPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr, 1728 /*Idx=*/KmpTaskTShareds); 1729 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1730 CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false, 1731 CGM.PointerAlignInBytes, C.VoidPtrTy, Loc), 1732 CGF.ConvertTypeForMem(SharedsPtrTy)); 1733 1734 llvm::Value *CallArgs[] = {GtidParam, PartidParam, SharedsParam}; 1735 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 1736 CGF.EmitStoreThroughLValue( 1737 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 1738 CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 1739 CGF.FinishFunction(); 1740 return TaskEntry; 1741 } 1742 1743 static llvm::Value * 1744 emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, 1745 QualType KmpInt32Ty, QualType KmpTaskTPtrQTy, 1746 QualType KmpTaskQTy, RecordDecl *KmpTaskQTyRD) { 1747 auto &C = CGM.getContext(); 1748 FunctionArgList Args; 1749 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 1750 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 1751 /*Id=*/nullptr, KmpTaskTPtrQTy); 1752 Args.push_back(&GtidArg); 1753 Args.push_back(&TaskTypeArg); 1754 FunctionType::ExtInfo Info; 1755 auto &DestructorFnInfo = 1756 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 1757 /*isVariadic=*/false); 1758 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 1759 auto *DestructorFn = 1760 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 1761 ".omp_task_destructor.", &CGM.getModule()); 1762 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn); 1763 CodeGenFunction CGF(CGM); 1764 CGF.disableDebugInfo(); 1765 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 1766 Args); 1767 1768 auto *TaskTypeArgAddr = CGF.EmitLoadOfScalar( 1769 CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false, 1770 CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc); 1771 LValue Base = CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskQTy); 1772 auto FI = std::next(KmpTaskQTyRD->field_begin(), KmpTaskTPrivates); 1773 Base = CGF.EmitLValueForField(Base, *FI); 1774 for (auto *Field : 1775 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 1776 if (auto DtorKind = Field->getType().isDestructedType()) { 1777 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 1778 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 1779 } 1780 } 1781 CGF.FinishFunction(); 1782 return DestructorFn; 1783 } 1784 1785 static int array_pod_sort_comparator(const VDPair *P1, const VDPair *P2) { 1786 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 1787 } 1788 1789 void CGOpenMPRuntime::emitTaskCall( 1790 CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, 1791 bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 1792 llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds, 1793 const Expr *IfCond, const ArrayRef<const Expr *> PrivateVars, 1794 const ArrayRef<const Expr *> PrivateCopies) { 1795 auto &C = CGM.getContext(); 1796 llvm::SmallVector<VDPair, 8> Privates; 1797 auto I = PrivateCopies.begin(); 1798 // Aggeregate privates and sort them by the alignment. 1799 for (auto *E : PrivateVars) { 1800 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1801 Privates.push_back(std::make_pair( 1802 C.getTypeAlignInChars(VD->getType()), 1803 std::make_pair(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl())))); 1804 ++I; 1805 } 1806 llvm::array_pod_sort(Privates.begin(), Privates.end(), 1807 array_pod_sort_comparator); 1808 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1809 // Build type kmp_routine_entry_t (if not built yet). 1810 emitKmpRoutineEntryT(KmpInt32Ty); 1811 // Build particular struct kmp_task_t for the given task. 1812 auto *KmpTaskQTyRD = createKmpTaskTRecordDecl( 1813 CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy, Privates); 1814 auto KmpTaskQTy = C.getRecordType(KmpTaskQTyRD); 1815 QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy); 1816 auto *KmpTaskTTy = CGF.ConvertType(KmpTaskQTy); 1817 auto *KmpTaskTPtrTy = KmpTaskTTy->getPointerTo(); 1818 auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy)); 1819 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 1820 1821 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 1822 // kmp_task_t *tt); 1823 auto *TaskEntry = 1824 emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, SharedsPtrTy, 1825 TaskFunction, KmpTaskTTy); 1826 1827 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1828 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1829 // kmp_routine_entry_t *task_entry); 1830 // Task flags. Format is taken from 1831 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 1832 // description of kmp_tasking_flags struct. 1833 const unsigned TiedFlag = 0x1; 1834 const unsigned FinalFlag = 0x2; 1835 unsigned Flags = Tied ? TiedFlag : 0; 1836 auto *TaskFlags = 1837 Final.getPointer() 1838 ? CGF.Builder.CreateSelect(Final.getPointer(), 1839 CGF.Builder.getInt32(FinalFlag), 1840 CGF.Builder.getInt32(/*C=*/0)) 1841 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 1842 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 1843 auto SharedsSize = C.getTypeSizeInChars(SharedsTy); 1844 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 1845 getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize, 1846 CGM.getSize(SharedsSize), 1847 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1848 TaskEntry, KmpRoutineEntryPtrTy)}; 1849 auto *NewTask = CGF.EmitRuntimeCall( 1850 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 1851 auto *NewTaskNewTaskTTy = 1852 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy); 1853 // Fill the data in the resulting kmp_task_t record. 1854 // Copy shareds if there are any. 1855 auto *KmpTaskSharedsPtr = CGF.EmitLoadOfScalar( 1856 CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy, 1857 /*Idx=*/KmpTaskTShareds), 1858 /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc); 1859 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) 1860 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 1861 // Emit initial values for private copies (if any). 1862 bool NeedsCleanup = false; 1863 if (!Privates.empty()) { 1864 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, KmpTaskQTy); 1865 auto FI = std::next(KmpTaskQTyRD->field_begin(), KmpTaskTPrivates); 1866 Base = CGF.EmitLValueForField(Base, *FI); 1867 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 1868 LValue SharedsBase = CGF.MakeNaturalAlignAddrLValue( 1869 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1870 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 1871 SharedsTy); 1872 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 1873 cast<CapturedStmt>(*D.getAssociatedStmt())); 1874 for (auto &&Pair : Privates) { 1875 auto *VD = Pair.second.second; 1876 auto *Init = VD->getAnyInitializer(); 1877 LValue PrivateLValue = CGF.EmitLValueForField(Base, *FI); 1878 if (Init) { 1879 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 1880 } 1881 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); 1882 // Copy addresses of privates to corresponding references in the list of 1883 // captured variables. 1884 // ... 1885 // tt->shareds.var_addr = &tt->privates.private_var; 1886 // ... 1887 auto *OriginalVD = Pair.second.first; 1888 auto *SharedField = CapturesInfo.lookup(OriginalVD); 1889 auto SharedRefLValue = 1890 CGF.EmitLValueForFieldInitialization(SharedsBase, SharedField); 1891 CGF.EmitStoreThroughLValue(RValue::get(PrivateLValue.getAddress()), 1892 SharedRefLValue); 1893 ++FI, ++I; 1894 } 1895 } 1896 // Provide pointer to function with destructors for privates. 1897 llvm::Value *DestructorFn = 1898 NeedsCleanup 1899 ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, 1900 KmpTaskQTy, KmpTaskQTyRD) 1901 : llvm::ConstantPointerNull::get( 1902 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 1903 CGF.Builder.CreateAlignedStore( 1904 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DestructorFn, 1905 KmpRoutineEntryPtrTy), 1906 CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy, 1907 /*Idx=*/KmpTaskTDestructors), 1908 CGM.PointerAlignInBytes); 1909 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 1910 // libcall. 1911 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1912 // *new_task); 1913 auto *ThreadID = getThreadID(CGF, Loc); 1914 llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID, NewTask}; 1915 auto &&ThenCodeGen = [this, &TaskArgs](CodeGenFunction &CGF) { 1916 // TODO: add check for untied tasks. 1917 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1918 }; 1919 auto &&ElseCodeGen = 1920 [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry]( 1921 CodeGenFunction &CGF) { 1922 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 1923 CGF.EmitRuntimeCall( 1924 createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs); 1925 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 1926 // kmp_task_t *new_task); 1927 CGF.EHStack.pushCleanup<CallEndCleanup>( 1928 NormalAndEHCleanup, 1929 createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), 1930 llvm::makeArrayRef(TaskArgs)); 1931 1932 // Call proxy_task_entry(gtid, new_task); 1933 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 1934 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 1935 }; 1936 if (IfCond) { 1937 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 1938 } else { 1939 CodeGenFunction::RunCleanupsScope Scope(CGF); 1940 ThenCodeGen(CGF); 1941 } 1942 } 1943 1944 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 1945 llvm::Type *ArgsType, 1946 ArrayRef<const Expr *> LHSExprs, 1947 ArrayRef<const Expr *> RHSExprs, 1948 ArrayRef<const Expr *> ReductionOps) { 1949 auto &C = CGM.getContext(); 1950 1951 // void reduction_func(void *LHSArg, void *RHSArg); 1952 FunctionArgList Args; 1953 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1954 C.VoidPtrTy); 1955 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1956 C.VoidPtrTy); 1957 Args.push_back(&LHSArg); 1958 Args.push_back(&RHSArg); 1959 FunctionType::ExtInfo EI; 1960 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1961 C.VoidTy, Args, EI, /*isVariadic=*/false); 1962 auto *Fn = llvm::Function::Create( 1963 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1964 ".omp.reduction.reduction_func", &CGM.getModule()); 1965 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 1966 CodeGenFunction CGF(CGM); 1967 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1968 1969 // Dst = (void*[n])(LHSArg); 1970 // Src = (void*[n])(RHSArg); 1971 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1972 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), 1973 CGF.PointerAlignInBytes), 1974 ArgsType); 1975 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1976 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), 1977 CGF.PointerAlignInBytes), 1978 ArgsType); 1979 1980 // ... 1981 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 1982 // ... 1983 CodeGenFunction::OMPPrivateScope Scope(CGF); 1984 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) { 1985 Scope.addPrivate( 1986 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()), 1987 [&]() -> llvm::Value *{ 1988 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1989 CGF.Builder.CreateAlignedLoad( 1990 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I), 1991 CGM.PointerAlignInBytes), 1992 CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType()))); 1993 }); 1994 Scope.addPrivate( 1995 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()), 1996 [&]() -> llvm::Value *{ 1997 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1998 CGF.Builder.CreateAlignedLoad( 1999 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I), 2000 CGM.PointerAlignInBytes), 2001 CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType()))); 2002 }); 2003 } 2004 Scope.Privatize(); 2005 for (auto *E : ReductionOps) { 2006 CGF.EmitIgnoredExpr(E); 2007 } 2008 Scope.ForceCleanup(); 2009 CGF.FinishFunction(); 2010 return Fn; 2011 } 2012 2013 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 2014 ArrayRef<const Expr *> LHSExprs, 2015 ArrayRef<const Expr *> RHSExprs, 2016 ArrayRef<const Expr *> ReductionOps, 2017 bool WithNowait) { 2018 // Next code should be emitted for reduction: 2019 // 2020 // static kmp_critical_name lock = { 0 }; 2021 // 2022 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 2023 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 2024 // ... 2025 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 2026 // *(Type<n>-1*)rhs[<n>-1]); 2027 // } 2028 // 2029 // ... 2030 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 2031 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2032 // RedList, reduce_func, &<lock>)) { 2033 // case 1: 2034 // ... 2035 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2036 // ... 2037 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2038 // break; 2039 // case 2: 2040 // ... 2041 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2042 // ... 2043 // break; 2044 // default:; 2045 // } 2046 2047 auto &C = CGM.getContext(); 2048 2049 // 1. Build a list of reduction variables. 2050 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 2051 llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size()); 2052 QualType ReductionArrayTy = 2053 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2054 /*IndexTypeQuals=*/0); 2055 auto *ReductionList = 2056 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 2057 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) { 2058 auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I); 2059 CGF.Builder.CreateAlignedStore( 2060 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2061 CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy), 2062 Elem, CGM.PointerAlignInBytes); 2063 } 2064 2065 // 2. Emit reduce_func(). 2066 auto *ReductionFn = emitReductionFunction( 2067 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs, 2068 RHSExprs, ReductionOps); 2069 2070 // 3. Create static kmp_critical_name lock = { 0 }; 2071 auto *Lock = getCriticalRegionLock(".reduction"); 2072 2073 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2074 // RedList, reduce_func, &<lock>); 2075 auto *IdentTLoc = emitUpdateLocation( 2076 CGF, Loc, 2077 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); 2078 auto *ThreadId = getThreadID(CGF, Loc); 2079 auto *ReductionArrayTySize = llvm::ConstantInt::get( 2080 CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity()); 2081 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, 2082 CGF.VoidPtrTy); 2083 llvm::Value *Args[] = { 2084 IdentTLoc, // ident_t *<loc> 2085 ThreadId, // i32 <gtid> 2086 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 2087 ReductionArrayTySize, // size_type sizeof(RedList) 2088 RL, // void *RedList 2089 ReductionFn, // void (*) (void *, void *) <reduce_func> 2090 Lock // kmp_critical_name *&<lock> 2091 }; 2092 auto Res = CGF.EmitRuntimeCall( 2093 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 2094 : OMPRTL__kmpc_reduce), 2095 Args); 2096 2097 // 5. Build switch(res) 2098 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 2099 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 2100 2101 // 6. Build case 1: 2102 // ... 2103 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2104 // ... 2105 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2106 // break; 2107 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 2108 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 2109 CGF.EmitBlock(Case1BB); 2110 2111 { 2112 CodeGenFunction::RunCleanupsScope Scope(CGF); 2113 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2114 llvm::Value *EndArgs[] = { 2115 IdentTLoc, // ident_t *<loc> 2116 ThreadId, // i32 <gtid> 2117 Lock // kmp_critical_name *&<lock> 2118 }; 2119 CGF.EHStack.pushCleanup<CallEndCleanup>( 2120 NormalAndEHCleanup, 2121 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 2122 : OMPRTL__kmpc_end_reduce), 2123 llvm::makeArrayRef(EndArgs)); 2124 for (auto *E : ReductionOps) { 2125 CGF.EmitIgnoredExpr(E); 2126 } 2127 } 2128 2129 CGF.EmitBranch(DefaultBB); 2130 2131 // 7. Build case 2: 2132 // ... 2133 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2134 // ... 2135 // break; 2136 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 2137 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 2138 CGF.EmitBlock(Case2BB); 2139 2140 { 2141 CodeGenFunction::RunCleanupsScope Scope(CGF); 2142 auto I = LHSExprs.begin(); 2143 for (auto *E : ReductionOps) { 2144 const Expr *XExpr = nullptr; 2145 const Expr *EExpr = nullptr; 2146 const Expr *UpExpr = nullptr; 2147 BinaryOperatorKind BO = BO_Comma; 2148 // Try to emit update expression as a simple atomic. 2149 if (auto *ACO = dyn_cast<AbstractConditionalOperator>(E)) { 2150 // If this is a conditional operator, analyze it's condition for 2151 // min/max reduction operator. 2152 E = ACO->getCond(); 2153 } 2154 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 2155 if (BO->getOpcode() == BO_Assign) { 2156 XExpr = BO->getLHS(); 2157 UpExpr = BO->getRHS(); 2158 } 2159 } 2160 // Analyze RHS part of the whole expression. 2161 if (UpExpr) { 2162 if (auto *BORHS = 2163 dyn_cast<BinaryOperator>(UpExpr->IgnoreParenImpCasts())) { 2164 EExpr = BORHS->getRHS(); 2165 BO = BORHS->getOpcode(); 2166 } 2167 } 2168 if (XExpr) { 2169 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2170 LValue X = CGF.EmitLValue(XExpr); 2171 RValue E; 2172 if (EExpr) 2173 E = CGF.EmitAnyExpr(EExpr); 2174 CGF.EmitOMPAtomicSimpleUpdateExpr( 2175 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, 2176 [&CGF, UpExpr, VD](RValue XRValue) { 2177 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 2178 PrivateScope.addPrivate( 2179 VD, [&CGF, VD, XRValue]() -> llvm::Value *{ 2180 auto *LHSTemp = CGF.CreateMemTemp(VD->getType()); 2181 CGF.EmitStoreThroughLValue( 2182 XRValue, 2183 CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType())); 2184 return LHSTemp; 2185 }); 2186 (void)PrivateScope.Privatize(); 2187 return CGF.EmitAnyExpr(UpExpr); 2188 }); 2189 } else { 2190 // Emit as a critical region. 2191 emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) { 2192 CGF.EmitIgnoredExpr(E); 2193 }, Loc); 2194 } 2195 ++I; 2196 } 2197 } 2198 2199 CGF.EmitBranch(DefaultBB); 2200 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 2201 } 2202 2203 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 2204 SourceLocation Loc) { 2205 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 2206 // global_tid); 2207 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2208 // Ignore return result until untied tasks are supported. 2209 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 2210 } 2211 2212 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 2213 const RegionCodeGenTy &CodeGen) { 2214 InlinedOpenMPRegionRAII Region(CGF, CodeGen); 2215 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 2216 } 2217 2218